From 0474aff9fff6c80bc6e75b51b739dc2e3b201962 Mon Sep 17 00:00:00 2001
From: NevilParikh14 <nevil.parikh@crestdatasys.com>
Date: Mon, 3 Oct 2022 15:29:31 +0530
Subject: [PATCH 01/22] Initial commit

---
 .circleci/config.yml                          |  14 +-
 tap_mixpanel/__init__.py                      |  75 +--
 tap_mixpanel/client.py                        | 302 ++++++-----
 tap_mixpanel/discover.py                      |  28 +-
 tap_mixpanel/schema.py                        | 149 +++---
 tap_mixpanel/streams.py                       | 454 ++++++++++++-----
 tap_mixpanel/sync.py                          |  26 +-
 tap_mixpanel/transform.py                     | 117 ++++-
 tests/configuration/fixtures.py               |   8 +-
 tests/tap_tester/base.py                      | 244 +++++----
 .../tap_tester/test_all_fields_pagination.py  | 163 ------
 tests/tap_tester/test_discovery.py            | 142 ------
 tests/tap_tester/test_mixpanel_all_fields.py  | 125 +++++
 ...s.py => test_mixpanel_automatic_fields.py} |  35 +-
 ..._bookmark.py => test_mixpanel_bookmark.py} | 100 ++--
 tests/tap_tester/test_mixpanel_discovery.py   | 190 +++++++
 tests/tap_tester/test_mixpanel_pagination.py  | 107 ++++
 ...rt_date.py => test_mixpanel_start_date.py} | 148 +++---
 tests/unittests/test_error_handling.py        | 472 ++++++------------
 tests/unittests/test_medium_client.py         | 178 ++++---
 .../test_request_timeout_param_value.py       | 162 +++---
 tests/unittests/test_support_eu_endpoints.py  | 228 ++++++---
 tests/unittests/test_transform_event_times.py |  44 +-
 23 files changed, 2040 insertions(+), 1471 deletions(-)
 delete mode 100644 tests/tap_tester/test_all_fields_pagination.py
 delete mode 100644 tests/tap_tester/test_discovery.py
 create mode 100644 tests/tap_tester/test_mixpanel_all_fields.py
 rename tests/tap_tester/{test_automatic_fields.py => test_mixpanel_automatic_fields.py} (67%)
 rename tests/tap_tester/{test_bookmark.py => test_mixpanel_bookmark.py} (69%)
 create mode 100644 tests/tap_tester/test_mixpanel_discovery.py
 create mode 100644 tests/tap_tester/test_mixpanel_pagination.py
 rename tests/tap_tester/{test_start_date.py => test_mixpanel_start_date.py} (50%)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 0865a7d..d60c08a 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -15,12 +15,11 @@ jobs:
             pip install -U 'pip<19.2' 'setuptools<51.0.0'
             pip install .[dev]
             pip install pytest-cov
-      # TODO: Fails pylint a lot, skipping for now (https://stitchdata.atlassian.net/browse/SRCE-4606)
-      #- run:
-      #    name: 'pylint tap'
-      #    command: |
-      #      source /usr/local/share/virtualenvs/tap-mixpanel/bin/activate
-      #      pylint tap_mixpanel -d 'broad-except,chained-comparison,empty-docstring,fixme,invalid-name,line-too-long,missing-class-docstring,missing-function-docstring,missing-module-docstring,no-else-raise,no-else-return,too-few-public-methods,too-many-arguments,too-many-branches,too-many-lines,too-many-locals,ungrouped-imports,wrong-spelling-in-comment,wrong-spelling-in-docstring,too-many-public-methods'
+      - run:
+         name: 'pylint tap'
+         command: |
+           source /usr/local/share/virtualenvs/tap-mixpanel/bin/activate
+           pylint tap_mixpanel -d 'broad-except,chained-comparison,empty-docstring,fixme,invalid-name,line-too-long,missing-module-docstring,no-else-raise,no-else-return,too-few-public-methods,too-many-arguments,too-many-branches,too-many-lines,too-many-locals,ungrouped-imports,too-many-public-methods,protected-access,too-many-statements,not-an-iterable'
       - run:
           name: 'JSON Validator'
           command: |
@@ -30,7 +29,8 @@ jobs:
           name: 'Unit Tests'
           command: |
             source /usr/local/share/virtualenvs/tap-mixpanel/bin/activate
-            python -m pytest --junitxml=junit/test-result.xml --cov=tap_mixpanel --cov-report=html tests/unittests/
+            pip install coverage parameterized
+			python -m pytest --junitxml=junit/test-result.xml --cov=tap_mixpanel --cov-report=html tests/unittests/
       - store_test_results:
           path: test_output/report.xml
       - store_artifacts:
diff --git a/tap_mixpanel/__init__.py b/tap_mixpanel/__init__.py
index 802d95a..00c5e40 100644
--- a/tap_mixpanel/__init__.py
+++ b/tap_mixpanel/__init__.py
@@ -1,12 +1,13 @@
 #!/usr/bin/env python3
 
-import sys
 import json
-import argparse
-from datetime import datetime, timedelta, date
+import sys
+from datetime import timedelta
+
 import singer
-from singer import metadata, utils
-from singer.utils import strptime_to_utc, strftime
+from singer import utils
+from singer.utils import strftime, strptime_to_utc
+
 from tap_mixpanel.client import MixpanelClient
 from tap_mixpanel.discover import discover
 from tap_mixpanel.sync import sync
@@ -15,29 +16,39 @@
 
 REQUEST_TIMEOUT = 300
 REQUIRED_CONFIG_KEYS = [
-    'project_timezone',
-    'api_secret',
-    'attribution_window',
-    'start_date',
-    'user_agent'
+    "project_timezone",
+    "api_secret",
+    "attribution_window",
+    "start_date",
+    "user_agent",
 ]
 
 
 def do_discover(client, properties_flag):
-    LOGGER.info('Starting discover')
+    """Call the discovery function.
+
+    Args:
+        client (MixpanelClient): Client object to make http calls.
+        properties_flag (str): Setting this argument to `true` ensures that new properties on
+                               events and engage records are captured.
+    """
+    LOGGER.info("Starting discover")
     catalog = discover(client, properties_flag)
     json.dump(catalog.to_dict(), sys.stdout, indent=2)
-    LOGGER.info('Finished discover')
+    LOGGER.info("Finished discover")
 
 
 @singer.utils.handle_top_exception(LOGGER)
 def main():
+    """
+    Run discover mode or sync mode.
+    """
     parsed_args = singer.utils.parse_args(REQUIRED_CONFIG_KEYS)
 
-    start_date = parsed_args.config['start_date']
+    start_date = parsed_args.config["start_date"]
     # Set request timeout to config param `request_timeout` value.
     # If value is 0, "0", "" or not passed then it sets default to 300 seconds.
-    config_request_timeout = parsed_args.config.get('request_timeout')
+    config_request_timeout = parsed_args.config.get("request_timeout")
     if config_request_timeout and float(config_request_timeout):
         request_timeout = float(config_request_timeout)
     else:
@@ -45,43 +56,47 @@ def main():
 
     start_dttm = strptime_to_utc(start_date)
     now_dttm = utils.now()
-    if parsed_args.config.get('end_date'):
-        now_dttm = strptime_to_utc(parsed_args.config.get('end_date'))
+    if parsed_args.config.get("end_date"):
+        now_dttm = strptime_to_utc(parsed_args.config.get("end_date"))
     delta_days = (now_dttm - start_dttm).days
     if delta_days >= 365:
         delta_days = 365
         start_date = strftime(now_dttm - timedelta(days=delta_days))
-        LOGGER.warning("start_date greater than 1 year maxiumum for API.")
+        LOGGER.warning("start_date greater than 1 year maximum for API.")
         LOGGER.warning("Setting start_date to 1 year ago, %s", start_date)
 
-    #Check support for EU endpoints
-    if str(parsed_args.config.get('eu_residency')).lower() == "true":
+    # Check support for EU endpoints
+    if str(parsed_args.config.get("eu_residency")).lower() == "true":
         api_domain = "eu.mixpanel.com"
     else:
         api_domain = "mixpanel.com"
 
-    with MixpanelClient(parsed_args.config['api_secret'],
-                        api_domain,
-                        request_timeout,
-                        parsed_args.config['user_agent']) as client:
+    with MixpanelClient(
+        parsed_args.config["api_secret"],
+        api_domain,
+        request_timeout,
+        parsed_args.config["user_agent"],
+    ) as client:
 
         state = {}
         if parsed_args.state:
             state = parsed_args.state
 
         config = parsed_args.config
-        properties_flag = config.get('select_properties_by_default')
+        properties_flag = config.get("select_properties_by_default")
 
         if parsed_args.discover:
             client.__api_domain = api_domain
             do_discover(client, properties_flag)
         elif parsed_args.catalog:
-            sync(client=client,
-                 config=config,
-                 catalog=parsed_args.catalog,
-                 state=state,
-                 start_date=start_date)
+            sync(
+                client=client,
+                config=config,
+                catalog=parsed_args.catalog,
+                state=state,
+                start_date=start_date,
+            )
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/tap_mixpanel/client.py b/tap_mixpanel/client.py
index 7f3d254..5bc97dc 100644
--- a/tap_mixpanel/client.py
+++ b/tap_mixpanel/client.py
@@ -1,11 +1,10 @@
 import base64
-import io
 
 import backoff
 import jsonlines
 import requests
 import singer
-from requests.exceptions import ConnectionError, Timeout
+from requests.exceptions import Timeout
 from singer import metrics
 
 LOGGER = singer.get_logger()
@@ -15,112 +14,127 @@
 
 
 class ReadTimeoutError(Exception):
-    pass
+    """Custom error for request timeout."""
 
 
 class Server5xxError(Exception):
-    pass
+    """Custom error class for all the 5xx error."""
 
 
 class Server429Error(Exception):
-    pass
+    """Custom error class for rate limit exceeded."""
 
 
 class MixpanelError(Exception):
-    pass
+    """Custom error class for all the Mixpanel errors."""
 
 
 class MixpanelBadRequestError(MixpanelError):
-    pass
+    """Custom error class for bad request."""
 
 
 class MixpanelUnauthorizedError(MixpanelError):
-    pass
+    """Custom error class for authorization."""
 
 
 class MixpanelPaymentRequiredError(MixpanelError):
-    pass
+    """Custom error if API call require payment."""
 
 
 class MixpanelNotFoundError(MixpanelError):
-    pass
+    """Custom error class for not found error."""
 
 
 class MixpanelForbiddenError(MixpanelError):
-    pass
+    """Custom error class for forbidden error."""
 
 
 class MixpanelInternalServiceError(Server5xxError):
-    pass
+    """Custom error class for internal server error."""
 
 
+# Custom errors with respective messages mapped by error code.
 ERROR_CODE_EXCEPTION_MAPPING = {
     400: {
         "raise_exception": MixpanelBadRequestError,
-        "message": "A validation exception has occurred."
+        "message": "A validation exception has occurred.",
     },
     401: {
         "raise_exception": MixpanelUnauthorizedError,
-        "message": "Invalid authorization credentials."
+        "message": "Invalid authorization credentials.",
     },
     402: {
         "raise_exception": MixpanelPaymentRequiredError,
-        "message": "Your current plan does not allow API calls. Payment is required to complete the operation."
+        "message": "Your current plan does not allow API calls. Payment is required to complete the operation.",
     },
     403: {
         "raise_exception": MixpanelForbiddenError,
-        "message": "User does not have permission to access the resource."
+        "message": "User does not have permission to access the resource.",
     },
     404: {
         "raise_exception": MixpanelNotFoundError,
-        "message": "The resource you have specified cannot be found."
+        "message": "The resource you have specified cannot be found.",
     },
     429: {
         "raise_exception": Server429Error,
-        "message": "The API rate limit for your organisation/application pairing has been exceeded."
+        "message": "The API rate limit for your organization/application pairing has been exceeded.",
     },
     500: {
         "raise_exception": MixpanelInternalServiceError,
-        "message": "Server encountered an unexpected condition that prevented it from fulfilling the request."
-    }
+        "message": "Server encountered an unexpected condition that prevented it from fulfilling the request.",
+    },
 }
 
+
 def raise_for_error(response):
-    LOGGER.error('ERROR %s: %s, REASON: %s', response.status_code,
-                                             response.text, 
-                                             response.reason)
+    """Retrieve the error code and the error message from the response
+       and raises custom exceptions accordingly.
+
+    Args:
+        response (requests.Response): Response with error code.
+
+    Raises:
+        exc: Custom exception prepared according to status code.
+    """
+    LOGGER.error(
+        "ERROR %s: %s, REASON: %s", response.status_code, response.text, response.reason
+    )
     try:
         response_json = response.json()
     except Exception:
         response_json = {}
     error_code = response.status_code
     error_message = response_json.get(
-        "error", response_json.get(
-            "message", ERROR_CODE_EXCEPTION_MAPPING.get(
-                error_code, {}).get(
-                    "message", "Unknown Error")))
-
-    # if response text contains something unusual error of to_date then provide helper message of timezone mismatch
+        "error",
+        response_json.get(
+            "message",
+            ERROR_CODE_EXCEPTION_MAPPING.get(error_code, {}).get(
+                "message", "Unknown Error"
+            ),
+        ),
+    )
+
+    # If response text contains something unusual error of to_date then provide helper message of timezone mismatch
     # E.g error: to_date cannot be later than today
     if error_code == 400:
         if "to_date" in response.text:
             error_message += " Please validate the timezone with the MixPanel UI under project settings."
         else:
-            error_message = '{}(Please verify your credentials.)'.format(error_message)
+            error_message = f"{error_message}(Please verify your credentials.)"
 
-    message = "HTTP-error-code: {}, Error: {}".format(error_code, error_message)
+    message = f"HTTP-error-code: {error_code}, Error: {error_message}"
 
-    exc = ERROR_CODE_EXCEPTION_MAPPING.get(
-        error_code, {}).get("raise_exception", MixpanelError)
+    exc = ERROR_CODE_EXCEPTION_MAPPING.get(error_code, {}).get(
+        "raise_exception", MixpanelError
+    )
     raise exc(message) from None
 
 
-class MixpanelClient(object):
-    def __init__(self,
-                 api_secret,
-                 api_domain,
-                 request_timeout,
-                 user_agent=None):
+class MixpanelClient:
+    """
+    The client class used for making REST calls to the Mixpanel API.
+    """
+    def __init__(self, api_secret, api_domain, request_timeout, user_agent=None):
         self.__api_secret = api_secret
         self.__api_domain = api_domain
         self.__request_timeout = request_timeout
@@ -136,63 +150,95 @@ def __enter__(self):
     def __exit__(self, exception_type, exception_value, traceback):
         self.__session.close()
 
-    @backoff.on_exception(backoff.expo,
-                          (Server5xxError, Server429Error, ReadTimeoutError, ConnectionError, Timeout),
-                          max_tries=5,
-                          factor=2)
+    @backoff.on_exception(
+        backoff.expo,
+        (Server5xxError, Server429Error, ReadTimeoutError, ConnectionError, Timeout),
+        max_tries=5,
+        factor=2,
+    )
     def check_access(self):
+        """Call rest API to verify user's credentials.
+
+        Raises:
+            Exception: Raises if response is not success.
+            ReadTimeoutError: Raises if requests timeout.
+
+        Returns:
+            bool: Returns true if credentials are verified.
+                  (else raises Exception)
+        """
         if self.__api_secret is None:
-            raise Exception('Error: Missing api_secret in tap config.json.')
+            raise Exception("Error: Missing api_secret in tap config.json.")
         headers = {}
         # Endpoint: simple API call to return a single record (org settings) to test access
-        url = 'https://{}/api/2.0/engage'.format(self.__api_domain)
+        url = f"https://{self.__api_domain}/api/2.0/engage"
         if self.__user_agent:
-            headers['User-Agent'] = self.__user_agent
-        headers['Accept'] = 'application/json'
-        headers['Authorization'] = 'Basic {}'.format(
-            str(base64.urlsafe_b64encode(self.__api_secret.encode("utf-8")), "utf-8"))
+            headers["User-Agent"] = self.__user_agent
+        headers["Accept"] = "application/json"
+        headers[
+            "Authorization"
+        ] = f"Basic {str(base64.urlsafe_b64encode(self.__api_secret.encode('utf-8')), 'utf-8')}"
 
         try:
             response = self.__session.get(
                 url=url,
-                timeout=self.__request_timeout, # Request timeout parameter
-                headers=headers)
+                timeout=self.__request_timeout,  # Request timeout parameter
+                headers=headers,
+            )
         except requests.exceptions.Timeout as err:
-            LOGGER.error('TIMEOUT ERROR: %s',str(err))
-            raise ReadTimeoutError
+            LOGGER.error("TIMEOUT ERROR: %s", str(err))
+            raise ReadTimeoutError from None
 
         if response.status_code == 402:
             # 402 Payment Requirement does not indicate a permissions or authentication error
             self.disable_engage_endpoint = True
-            LOGGER.warning('Mixpanel returned a 402 from the Engage API. Engage stream will be skipped.')
+            LOGGER.warning(
+                "Mixpanel returned a 402 from the Engage API. Engage stream will be skipped."
+            )
             return True
         elif response.status_code != 200:
-            LOGGER.error('Error status_code = {}'.format(response.status_code))
+            LOGGER.error("Error status_code = %s", response.status_code)
             raise_for_error(response)
-        else:
-            return True
+        return True
 
     @backoff.on_exception(
         backoff.expo,
         (Server5xxError, Server429Error, ReadTimeoutError, ConnectionError, Timeout),
         max_tries=BACKOFF_MAX_TRIES_REQUEST,
         factor=3,
-        logger=LOGGER)
-    def perform_request(self,
-                        method,
-                        url=None,
-                        params=None,
-                        json=None,
-                        stream=False,
-                        **kwargs):
+        logger=LOGGER,
+    )
+    def perform_request(
+        self, method, url=None, params=None, json=None, stream=False, **kwargs
+    ):
+        """Call rest API and return the response in case of status code 200.
+
+        Args:
+            method (str): GET or POST method.
+            url (str, optional): Complete url for the stream. Defaults to None.
+            params (dict, optional): Query params. Defaults to None.
+            json (dict, optional): JSON data (For POST request). Defaults to None.
+            stream (bool, optional): If False, a response transfers indicating that
+                                     the file should download immediately. If True, stream the file.
+                                     Defaults to False.
+
+        Raises:
+            Server5xxError: Raises if status code > 500
+            ReadTimeoutError: Raises if request timeouts.
+
+        Returns:
+            dict: With status code 200, returns JSON formatted response.
+        """
         try:
-            response = self.__session.request(method=method,
-                                              url=url,
-                                              params=params,
-                                              json=json,
-                                              stream=stream,
-                                              timeout=self.__request_timeout, # Request timeout parameter
-                                              **kwargs)
+            response = self.__session.request(
+                method=method,
+                url=url,
+                params=params,
+                json=json,
+                stream=stream,
+                timeout=self.__request_timeout,  # Request timeout parameter
+                **kwargs,
+            )
 
             if response.status_code > 500:
                 raise Server5xxError()
@@ -201,89 +247,111 @@ def perform_request(self,
                 raise_for_error(response)
             return response
         except requests.exceptions.Timeout as err:
-            LOGGER.error('TIMEOUT ERROR: %s',str(err))
-            raise ReadTimeoutError(err)
+            LOGGER.error("TIMEOUT ERROR: %s", str(err))
+            raise ReadTimeoutError(err) from None
 
     def request(self, method, url=None, path=None, params=None, json=None, **kwargs):
+        """Request method to return JSON response of HTTP call.
+
+        Args:
+            method (str): GET or POST method.
+            url (str, optional): Base URL. Defaults to None.
+            path (str, optional): Path for the stream. Defaults to None.
+            params (dict, optional): Query params. Defaults to None.
+            json (dict, optional): JSON data (For POST requests). Defaults to None.
+
+        Returns:
+            dict: JSON object of response.
+        """
         if not self.__verified:
             self.__verified = self.check_access()
 
         if url and path:
-            url = '{}/{}'.format(url, path)
+            url = f"{url}/{path}"
         elif path and not url:
-            url = 'https://{}/api/2.0/{}'.format(self.__api_domain, path)
+            url = f"https://{self.__api_domain}/api/2.0/{path}"
 
-        if 'endpoint' in kwargs:
-            endpoint = kwargs['endpoint']
-            del kwargs['endpoint']
+        if "endpoint" in kwargs:
+            endpoint = kwargs["endpoint"]
+            del kwargs["endpoint"]
         else:
             endpoint = None
 
-        if 'headers' not in kwargs:
-            kwargs['headers'] = {}
+        if "headers" not in kwargs:
+            kwargs["headers"] = {}
 
-        kwargs['headers']['Accept'] = 'application/json'
+        kwargs["headers"]["Accept"] = "application/json"
 
         if self.__user_agent:
-            kwargs['headers']['User-Agent'] = self.__user_agent
+            kwargs["headers"]["User-Agent"] = self.__user_agent
 
-        if method == 'POST':
-            kwargs['headers']['Content-Type'] = 'application/json'
+        if method == "POST":
+            kwargs["headers"]["Content-Type"] = "application/json"
 
-        kwargs['headers']['Authorization'] = 'Basic {}'.format(
-            str(base64.urlsafe_b64encode(self.__api_secret.encode("utf-8")), "utf-8"))
+        kwargs["headers"][
+            "Authorization"
+        ] = f"Basic {str(base64.urlsafe_b64encode(self.__api_secret.encode('utf-8')), 'utf-8')}"
         with metrics.http_request_timer(endpoint) as timer:
-            response = self.perform_request(method=method,
-                                            url=url,
-                                            params=params,
-                                            json=json,
-                                            **kwargs)
+            response = self.perform_request(
+                method=method, url=url, params=params, json=json, **kwargs
+            )
 
             timer.tags[metrics.Tag.http_status_code] = response.status_code
 
         response_json = response.json()
         return response_json
 
-    def request_export(self, method, url=None, path=None, params=None, json=None, **kwargs):
+    def request_export(
+        self, method, url=None, path=None, params=None, json=None, **kwargs
+    ):
+        """Method to read jsonline from export stream response.
+
+        Args:
+            method (str): HTTP request method.
+            url (str, optional): Base URL for the export endpoint. Defaults to None.
+            path (str, optional): Path to the stream(export). Defaults to None.
+            params (dict, optional): Request calls params. Defaults to None.
+            json (dict, optional): JSON data (For POST request). Defaults to None.
+
+        Yields:
+            dict: Records of export stream.
+        """
         if not self.__verified:
             self.__verified = self.check_access()
 
         if url and path:
-            url = '{}/{}'.format(url, path)
+            url = f"{url}/{path}"
         elif path and not url:
-            url = 'https://{}/api/2.0/{}'.format(self.__api_domain, path)
+            url = f"https://{self.__api_domain}/api/2.0/{path}"
 
-        if 'endpoint' in kwargs:
-            endpoint = kwargs['endpoint']
-            del kwargs['endpoint']
+        if "endpoint" in kwargs:
+            endpoint = kwargs["endpoint"]
+            del kwargs["endpoint"]
         else:
-            endpoint = 'export'
+            endpoint = "export"
 
-        if 'headers' not in kwargs:
-            kwargs['headers'] = {}
+        if "headers" not in kwargs:
+            kwargs["headers"] = {}
 
-        kwargs['headers']['Accept'] = 'application/json'
+        kwargs["headers"]["Accept"] = "application/json"
 
         if self.__user_agent:
-            kwargs['headers']['User-Agent'] = self.__user_agent
+            kwargs["headers"]["User-Agent"] = self.__user_agent
 
-        if method == 'POST':
-            kwargs['headers']['Content-Type'] = 'application/json'
+        if method == "POST":
+            kwargs["headers"]["Content-Type"] = "application/json"
 
-        kwargs['headers']['Authorization'] = 'Basic {}'.format(
-            str(base64.urlsafe_b64encode(self.__api_secret.encode("utf-8")), "utf-8"))
+        kwargs["headers"][
+            "Authorization"
+        ] = f"Basic {str(base64.urlsafe_b64encode(self.__api_secret.encode('utf-8')), 'utf-8')}"
         with metrics.http_request_timer(endpoint) as timer:
-            response = self.perform_request(method=method,
-                                            url=url,
-                                            params=params,
-                                            json=json,
-                                            stream=True,
-                                            **kwargs)
+            response = self.perform_request(
+                method=method, url=url, params=params, json=json, stream=True, **kwargs
+            )
             timer.tags[metrics.Tag.http_status_code] = response.status_code
 
-            # export endpoint returns jsonl results;
-            #  other endpoints return json with array of results
+            # 'export' endpoint returns jsonl results;
+            #  Other endpoints return json with array of results
             #  jsonlines reference: https://jsonlines.readthedocs.io/en/latest/
             reader = jsonlines.Reader(response.iter_lines())
-            for record in reader.iter(allow_none=True, skip_empty=True):
-                yield record
+            yield from reader.iter(allow_none=True, skip_empty=True)
diff --git a/tap_mixpanel/discover.py b/tap_mixpanel/discover.py
index dfd3526..498b988 100644
--- a/tap_mixpanel/discover.py
+++ b/tap_mixpanel/discover.py
@@ -1,8 +1,20 @@
 from singer.catalog import Catalog, CatalogEntry, Schema
+
 from tap_mixpanel.schema import get_schemas
 from tap_mixpanel.streams import STREAMS
 
+
 def discover(client, properties_flag):
+    """Run the discovery mode, prepare the catalog file and return catalog.
+
+    Args:
+        client (MixpanelClient): Client object to make http calls.
+        properties_flag (str): Setting this argument to `true` ensures that new properties on
+                               events and engage records are captured.
+
+    Returns:
+        singer.Catalog: Catalog object having schema and metadata of all the streams.
+    """
     schemas, field_metadata = get_schemas(client, properties_flag)
     catalog = Catalog([])
 
@@ -10,12 +22,14 @@ def discover(client, properties_flag):
         schema = Schema.from_dict(schema_dict)
         mdata = field_metadata[stream_name]
 
-        catalog.streams.append(CatalogEntry(
-            stream=stream_name,
-            tap_stream_id=stream_name,
-            key_properties=STREAMS[stream_name].key_properties,
-            schema=schema,
-            metadata=mdata
-        ))
+        catalog.streams.append(
+            CatalogEntry(
+                stream=stream_name,
+                tap_stream_id=stream_name,
+                key_properties=STREAMS[stream_name].key_properties,
+                schema=schema,
+                metadata=mdata,
+            )
+        )
 
     return catalog
diff --git a/tap_mixpanel/schema.py b/tap_mixpanel/schema.py
index 30a8506..8b3bcb0 100644
--- a/tap_mixpanel/schema.py
+++ b/tap_mixpanel/schema.py
@@ -1,22 +1,45 @@
-import os
 import json
+import os
+
+import singer
 from singer import metadata
+
 from tap_mixpanel.streams import STREAMS
-import singer
 
 LOGGER = singer.get_logger()
 
 # Reference:
 # https://github.com/singer-io/getting-started/blob/master/docs/DISCOVERY_MODE.md#Metadata
 
+
 def get_abs_path(path):
+    """Get the absolute path for the schema files.
+
+    Args:
+        path (str): Path from current folder to schema file.
+
+    Returns:
+        str: Full path to schema file.
+    """
     return os.path.join(os.path.dirname(os.path.realpath(__file__)), path)
 
 
 def get_schema(client, properties_flag, stream_name):
-    schema_path = get_abs_path('schemas/{}.json'.format(stream_name))
+    """Creates schema for a stream by loading schema file and appending dynamic
+    fields schema if necessary.
+
+    Args:
+        client (MixpanelClient): Client to make http calls.
+        properties_flag (str): Setting this argument to `true` ensures that new properties on
+                               events and engage records are captured.
+        stream_name (str): Name of stream whose schema is to create.
+
+    Returns:
+        dict: Returns schema of the stream.
+    """
+    schema_path = get_abs_path(f"schemas/{stream_name}.json")
 
-    with open(schema_path) as file:
+    with open(schema_path, encoding="utf-8") as file:
         schema = json.load(file)
 
     # Set whether to allow additional properties for engage and export endpoints
@@ -24,58 +47,45 @@ def get_schema(client, properties_flag, stream_name):
     #   when the Event or Engage (user/person) was created.
     # Depending on the tap config parameter select_properties_by_default,
     #   the json schema should allow additional properties (additionalProperties = true).
-    if stream_name in ('engage', 'export') and str(properties_flag).lower() == 'true':
-        schema['additionalProperties'] = True
+    if stream_name in ("engage", "export") and str(properties_flag).lower() == "true":
+        schema["additionalProperties"] = True
     else:
-        schema['additionalProperties'] = False
+        schema["additionalProperties"] = False
 
-    if stream_name == 'engage':
+    if stream_name == "engage":
         properties = client.request(
-            method='GET',
-            url='https://{}/api/2.0'.format(client.__api_domain),
-            path='engage/properties',
-            params={'limit': 2000},
-            endpoint='engage_properties')
-        if properties.get('status') == 'ok':
-            results = properties.get('results', {})
+            method="GET",
+            url=f"https://{client.__api_domain}/api/2.0",
+            path="engage/properties",
+            params={"limit": 2000},
+            endpoint="engage_properties",
+        )
+        if properties.get("status") == "ok":
+            results = properties.get("results", {})
             for key, val in results.items():
-                if key[0:1] == '$':
-                    new_key = 'mp_reserved_{}'.format(key[1:])
+                if key[0:1] == "$":
+                    new_key = f"mp_reserved_{key[1:]}"
                 else:
                     new_key = key
 
                 # property_type: string, number, boolean, datetime, object, list
                 # Reference:
                 # https://help.mixpanel.com/hc/en-us/articles/115004547063-Properties-Supported-Data-Types
-                property_type = val.get('type')
+                property_type = val.get("type")
 
                 types = {
-                    'boolean': {
-                        'type': ['null', 'boolean']
-                    },
-                    'number': {
-                        'type': ['null', 'string'],
-                        'format': 'singer.decimal'
-                    },
-                    'datetime': {
-                        'type': ['null', 'string'],
-                        'format': 'date-time'
-                    },
-                    'object': {
-                        'type': ['null', 'object'],
-                        'additionalProperties': True
-                    },
-                    'list': {
-                        'type': ['null', 'array'],
-                        'required': False,
-                        'items': {}
+                    "boolean": {"type": ["null", "boolean"]},
+                    "number": {"type": ["null", "string"], "format": "singer.decimal"},
+                    "datetime": {"type": ["null", "string"], "format": "date-time"},
+                    "object": {
+                        "type": ["null", "object"],
+                        "additionalProperties": True,
                     },
-                    'string': {
-                        'type': ['null', 'string']
-                    }
+                    "list": {"type": ["null", "array"], "required": False, "items": {}},
+                    "string": {"type": ["null", "string"]},
                 }
 
-                if property_type in types.keys():
+                if property_type in types:
                     # Make the types a list containing all types starting with the one returned to us by the API
                     this_type = [types.pop(property_type)]
                     this_type += list(types.values())
@@ -83,40 +93,52 @@ def get_schema(client, properties_flag, stream_name):
                 else:
                     this_type = list(types.values())
 
+                schema["properties"][new_key] = {"anyOf": this_type}
 
-                schema['properties'][new_key] = {'anyOf': this_type}
-
-    if stream_name == 'export':
+    if stream_name == "export":
         # Event properties endpoint:
         #  https://developer.mixpanel.com/docs/data-export-api#section-hr-span-style-font-family-courier-top-span
         results = client.request(
-            method='GET',
-            url='https://{}/api/2.0'.format(client.__api_domain),
-            path='events/properties/top',
-            params={'limit': 2000},
-            endpoint='event_properties')
+            method="GET",
+            url=f"https://{client.__api_domain}/api/2.0",
+            path="events/properties/top",
+            params={"limit": 2000},
+            endpoint="event_properties",
+        )
         for key, val in results.items():
-            if key[0:1] == '$':
-                new_key = 'mp_reserved_{}'.format(key[1:])
+            if key[0:1] == "$":
+                new_key = f"mp_reserved_{key[1:]}"
             else:
                 new_key = key
 
-            # string ONLY for event properties (no other datatypes)
+            # String ONLY for event properties (no other datatypes)
             # Reference: https://help.mixpanel.com/hc/en-us/articles/360001355266-Event-Properties#field-size-character-limits-for-event-properties
-            schema['properties'][new_key] = {
-                'type': ['null', 'string']
-            }
+            schema["properties"][new_key] = {"type": ["null", "string"]}
 
     return schema
 
+
 def get_schemas(client, properties_flag):
+    """Load the schema references, prepare metadata for each streams and return
+    schema and metadata for the catalog.
+
+    Args:
+        client (MixpanelClient): Client object to make http calls.
+        properties_flag (bool): Setting this argument to true ensures that new properties on
+                                   events and engage records are captured.
+
+    Returns:
+        tuple: Returns tuple of Schemas and metadata.
+    """
     schemas = {}
     field_metadata = {}
 
     for stream_name, stream_metadata in STREAMS.items():
         # When the client detects disable_engage_endpoint, skip discovering the stream
-        if stream_name == 'engage' and client.disable_engage_endpoint:
-            LOGGER.warning('Mixpanel returned a 402 indicating the Engage endpoint and stream is unavailable. Skipping.')
+        if stream_name == "engage" and client.disable_engage_endpoint:
+            LOGGER.warning(
+                "Mixpanel returned a 402 indicating the Engage endpoint and stream is unavailable. Skipping."
+            )
             continue
 
         schema = get_schema(client, properties_flag, stream_name)
@@ -132,17 +154,18 @@ def get_schemas(client, properties_flag):
             schema=schema,
             key_properties=stream_metadata.key_properties,
             valid_replication_keys=stream_metadata.replication_keys,
-            replication_method=stream_metadata.replication_method
+            replication_method=stream_metadata.replication_method,
         )
 
         mdata = metadata.to_map(mdata)
 
         if stream_metadata.replication_keys:
-                mdata = metadata.write(
-                    mdata,
-                    ('properties', stream_metadata.replication_keys[0]),
-                    'inclusion',
-                    'automatic')
+            mdata = metadata.write(
+                mdata,
+                ("properties", stream_metadata.replication_keys[0]),
+                "inclusion",
+                "automatic",
+            )
 
         mdata = metadata.to_list(mdata)
 
diff --git a/tap_mixpanel/streams.py b/tap_mixpanel/streams.py
index c57cd60..e4cfd69 100644
--- a/tap_mixpanel/streams.py
+++ b/tap_mixpanel/streams.py
@@ -1,6 +1,4 @@
-"""
-This module defines the stream classes and their individual sync logic.
-"""
+"""This module defines the stream classes and their individual sync logic."""
 
 import json
 import math
@@ -12,7 +10,7 @@
 from singer.utils import strptime_to_utc
 
 from tap_mixpanel.client import MixpanelClient
-from tap_mixpanel.transform import transform_record, transform_datetime
+from tap_mixpanel.transform import transform_datetime, transform_record
 
 LOGGER = singer.get_logger()
 
@@ -22,6 +20,7 @@ class MixPanel:
     A base class representing singer streams.
     :param client: The API client used to extract records from external source
     """
+
     tap_stream_id = None
     replication_method = None
     replication_keys = []
@@ -43,20 +42,47 @@ def __init__(self, client: MixpanelClient):
         self.client = client
 
     def write_schema(self, catalog, stream_name):
+        """Writes the schema of the stream form the catalog.
+
+        Args:
+            catalog (singer.Catalog): Catalog object having schema and metadata of all the streams.
+            stream_name (str): Name of the syncing stream.
+
+        Raises:
+            err: Raises if any error occur while writing schema.
+        """
         stream = catalog.get_stream(stream_name)
         schema = stream.schema.to_dict()
         try:
             singer.write_schema(stream_name, schema, stream.key_properties)
         except OSError as err:
-            LOGGER.error("OS Error writing schema for: %s",stream_name)
+            LOGGER.error("OS Error writing schema for: %s", stream_name)
             raise err
 
     def get_bookmark(self, state, stream, default):
+        """Get the bookmark value from the state if available in the state.
+        Else return start date.
+
+        Args:
+            state (dict): State containing bookmarks of the streams if available.
+            stream (str): Name of the stream to get the bookmark.
+            default (str): Default value to return if bookmark is not available.(start_date)
+
+        Returns:
+            str: Returns bookmark value.
+        """
         if (state is None) or ("bookmarks" not in state):
             return default
         return state.get("bookmarks", {}).get(stream, default)
 
     def write_bookmark(self, state, stream, value):
+        """Updates the stream bookmark value in the state. And writes the state.
+
+        Args:
+            state (dict): State containing bookmarks of the streams if available.
+            stream (str): Name of stream whose bookmark will be written.
+            value (str): Bookmark value of the stream.
+        """
         if "bookmarks" not in state:
             state["bookmarks"] = {}
         state["bookmarks"][stream] = value
@@ -73,6 +99,25 @@ def process_records(
         max_bookmark_value=None,
         last_datetime=None,
     ):
+        """Transform the record as per the schema and writes record if replication value > bookmark.
+
+        Args:
+            stream_name (str): Name of the syncing stream.
+            records (dict): Record to be written.
+            time_extracted (datetime): Datetime when the data was extracted from the API
+            bookmark_field (str, optional): Bookmark field in the state if stream is INCREMENTAL.
+                                            Defaults to None.
+            max_bookmark_value (str, optional): Maximum bookmark value if written records if replication key
+                                                is available. Defaults to None.
+            last_datetime (str, optional): Last datetime from which greater replication value records will be written.
+                                           Defaults to None.
+
+        Raises:
+            err: Raises exception if transformation error occurs.
+
+        Returns:
+            tuple: Tuple of maximum bookmark value if written records and written records count.
+        """
         stream = catalog.get_stream(stream_name)
         schema = stream.schema.to_dict()
         stream_metadata = metadata.to_map(stream.metadata)
@@ -86,9 +131,10 @@ def process_records(
                             record, schema, stream_metadata
                         )
                     except Exception as err:
-                        LOGGER.error("Error: %s",str(err))
-                        LOGGER.error("For schema: %s",
-                            json.dumps(schema, sort_keys=True, indent=2)
+                        LOGGER.error("Error: %s", str(err))
+                        LOGGER.error(
+                            "For schema: %s",
+                            json.dumps(schema, sort_keys=True, indent=2),
                         )
                         raise err
 
@@ -123,34 +169,67 @@ def process_records(
             return max_bookmark_value, counter.value
 
     def get_and_transform_records(
-            self, querystring, project_timezone, max_bookmark_value, catalog, last_datetime, endpoint_total,
-            limit, total_records, parent_total, record_count, page, offset, parent_record, date_total):
-        """
-        Get the records using the client get request and transform it using transform_records
-        and return the max_bookmark_value
+        self,
+        querystring,
+        project_timezone,
+        max_bookmark_value,
+        catalog,
+        last_datetime,
+        endpoint_total,
+        limit,
+        total_records,
+        parent_total,
+        record_count,
+        page,
+        offset,
+        parent_record,
+        date_total,
+    ):
+        """Get the records using the client get request and transform it using
+        transform_records and return the max_bookmark_value.
+
+        Args:
+            querystring (str): Params in URL query format to join with stream path
+            project_timezone (str): Time zone in which integer date times are stored.
+            max_bookmark_value (str): Maximum bookmark value among written records.
+            catalog (singer.Catalog): Catalog object having schema and metadata of all the streams.
+            last_datetime (str): Last datetime from which greater replication value records will be written.
+            endpoint_total (int): Total number of records written yet.
+            limit (int): Page size.
+            total_records (int): Total number of records available for the sync.
+            parent_total (int): Total records for parent ID
+            record_count (int): Number of records per page written by tap.
+            page (int): Page count.
+            offset (int): Offset value of stream data for the pagination.
+            parent_record (dict): Record of parent stream.
+            date_total (int): Total records written for the date window.
+
+        Raises:
+            Exception: Raises if any key-property is missing.
+
+        Returns:
+            tuple: Returns tuple of parent_total, date_total, offset, page, session_id,
+                   endpoint_total, max_bookmark_value, total_records
         """
 
         session_id = None
         data = self.client.request(
-            method='GET',
+            method="GET",
             url=self.url,
             path=self.path,
             params=querystring,
-            endpoint=self.tap_stream_id)
+            endpoint=self.tap_stream_id,
+        )
 
         # time_extracted: datetime when the data was extracted from the API
         time_extracted = utils.now()
-        full_url = '{}/{}{}'.format(
-            self.url,
-            self.path,
-            '?{}'.format(querystring) if querystring else '')
+        full_url = f"{self.url}/{self.path}{f'?{querystring}' if querystring else ''}"
         if not data:
-            LOGGER.info('No data for URL: %s',full_url)
+            LOGGER.info("No data for URL: %s", full_url)
             # No data results
-        else:  # has data
+        else:  # Has data
             # Transform data with transform_json from transform.py
             # The data_key identifies the array/list of records below the <root> element
-            # LOGGER.info('data = {}'.format(data)) # TESTING, comment out
             transformed_data = []  # initialize the record list
 
             # Endpoints: funnels, revenue return results as dictionary for each date
@@ -159,10 +238,10 @@ def get_and_transform_records(
                 results = {}
                 results_list = []
                 for key, val in data[self.data_key].items():
-                    # skip $overall summary
-                    if key != '$overall':
-                        val['date'] = key
-                        val['datetime'] = '{}T00:00:00Z'.format(key)
+                    # Skip $overall summary
+                    if key != "$overall":
+                        val["date"] = key
+                        val["datetime"] = f"{key}T00:00:00Z"
                         results_list.append(val)
                 results[self.data_key] = results_list
                 data = results
@@ -170,35 +249,32 @@ def get_and_transform_records(
             # Cohorts endpoint returns results as a list/array (no data_key)
             # All other endpoints have a data_key
             if not self.data_key:
-                self.data_key = 'results'
-                new_data = {
-                    'results': data
-                }
+                self.data_key = "results"
+                new_data = {"results": data}
                 data = new_data
 
             transformed_data = []
             # Loop through result records
             for record in data[self.data_key]:
-                # transform record and append to transformed_data array
-                transformed_record = transform_record(record,
-                                                      self.tap_stream_id,
-                                                      project_timezone,
-                                                      parent_record)
+                # Transform record and append to transformed_data array
+                transformed_record = transform_record(
+                    record, self.tap_stream_id, project_timezone, parent_record
+                )
                 transformed_data.append(transformed_record)
 
                 # Check for missing keys
                 for key in self.key_properties:
                     val = transformed_record.get(key)
                     if not val:
-                        LOGGER.error('Error: Missing Key')
-                        raise 'Missing Key'
+                        LOGGER.error("Error: Missing Key")
+                        raise Exception("Missing Key")
 
                 # End data record loop
 
             if not transformed_data:
-                 LOGGER.info('No transformed data for data = %s', data)
-                # No transformed data results
-            else:  # has transformed data
+                LOGGER.info("No transformed data for data = %s", data)
+            # No transformed data results
+            else:  # Has transformed data
                 # Process records and get the max_bookmark_value and record_count
                 max_bookmark_value, record_count = self.process_records(
                     catalog=catalog,
@@ -207,47 +283,73 @@ def get_and_transform_records(
                     time_extracted=time_extracted,
                     bookmark_field=next(iter(self.replication_keys), None),
                     max_bookmark_value=max_bookmark_value,
-                    last_datetime=last_datetime)
-                LOGGER.info('Stream %s, batch processed %s records', self.tap_stream_id, record_count)
+                    last_datetime=last_datetime,
+                )
+                LOGGER.info(
+                    "Stream %s, batch processed %s records",
+                    self.tap_stream_id,
+                    record_count,
+                )
 
-                # set total_records and pagination fields
+                # Set total_records and pagination fields
                 if page == 0:
                     if isinstance(data, dict):
-                        total_records = data.get('total', record_count)
+                        total_records = data.get("total", record_count)
                     else:
                         total_records = record_count
                 parent_total = parent_total + record_count
                 date_total = date_total + record_count
                 endpoint_total = endpoint_total + record_count
                 if isinstance(data, dict):
-                    session_id = data.get('session_id', None)
+                    session_id = data.get("session_id", None)
 
                 # to_rec: to record; ending record for the batch page
                 if self.pagination:
                     to_rec = offset + limit
-                    if to_rec > total_records:
-                        to_rec = total_records
+                    to_rec = min(to_rec, total_records)
                 else:
                     to_rec = record_count
 
-                LOGGER.info('Synced Stream: %s, page: %s, %s to %s of total: %s',
-                            self.tap_stream_id,
-                            page,
-                            offset,
-                            to_rec,
-                            total_records)
+                LOGGER.info(
+                    "Synced Stream: %s, page: %s, %s to %s of total: %s",
+                    self.tap_stream_id,
+                    page,
+                    offset,
+                    to_rec,
+                    total_records,
+                )
                 # End has transformed data
             # End has data results
 
         # Pagination: increment the offset by the limit (batch-size) and page
         offset = offset + limit
         page = page + 1
-        return parent_total, date_total, offset, page, session_id, endpoint_total, max_bookmark_value, total_records
-
-    def define_bookmark_filters(self, days_interval, last_datetime, now_datetime, attribution_window):
-        """
-        define the params from and to according to the filters provided in
-         the bookmark_query_field_from and bookmark_query_field_to
+        return (
+            parent_total,
+            date_total,
+            offset,
+            page,
+            session_id,
+            endpoint_total,
+            max_bookmark_value,
+            total_records,
+        )
+
+    def define_bookmark_filters(
+        self, days_interval, last_datetime, now_datetime, attribution_window
+    ):
+        """Define the params from and to according to the filters provided in
+        the bookmark_query_field_from and bookmark_query_field_to.
+
+        Args:
+            days_interval (int): Interval in days between start_window and end_window
+            last_datetime (str): Last datetime from records will be fetched.
+            now_datetime (datetime): Current datetime from sync started.
+            attribution_window (int): Latency minimum number of days to look-back to
+                                      account for delays in attributing accurate results.
+
+        Returns:
+            tuple: Returns tuple if start_window, date_window and interval
         """
         if self.bookmark_query_field_from and self.bookmark_query_field_to:
             # days_interval from config date_window_size, default = 60; passed to function from sync
@@ -258,57 +360,69 @@ def define_bookmark_filters(self, days_interval, last_datetime, now_datetime, at
             delta_days = (now_datetime - last_dttm).days
             if delta_days <= attribution_window:
                 delta_days = attribution_window
-                LOGGER.info("Start bookmark less than %s day attribution window.", attribution_window)
+                LOGGER.info(
+                    "Start bookmark less than %s day attribution window.",
+                    attribution_window,
+                )
             elif delta_days >= 365:
                 delta_days = 365
-                LOGGER.warning("Start date or bookmark greater than 1 year maxiumum.")
+                LOGGER.warning("Start date or bookmark greater than 1 year maximum.")
                 LOGGER.warning("Setting bookmark start to 1 year ago.")
 
             start_window = now_datetime - timedelta(days=delta_days)
             end_window = start_window + timedelta(days=days_interval)
-            if end_window > now_datetime:
-                end_window = now_datetime
+            end_window = min(end_window, now_datetime)
         else:
             start_window = strptime_to_utc(last_datetime)
             end_window = now_datetime
             diff_sec = (end_window - start_window).seconds
-            # round-up difference to days
+            # Round-up difference to days
             days_interval = math.ceil(diff_sec / (3600 * 24))
 
         return start_window, end_window, days_interval
 
     def sync(self, state, catalog, config, start_date):
-        # the sync method common to all the streams which internally call methods depending on different endpoints
+        """The sync method common to all the streams which internally call methods depending on different endpoints.
+
+        Args:
+            state (dict): State containing bookmarks of the streams if available.
+            catalog (singer.Catalog): Catalog object having schema and metadata of all the streams.
+            config (dict): The tap config file for this tap should include these entries.
+            start_date (str): The default value to use if no bookmark exists for an endpoint
+
+        Returns:
+            int: Returns total number of records.
+        """
 
         bookmark_field = next(iter(self.replication_keys), None)
         project_timezone = config.get("project_timezone", "UTC")
         days_interval = int(config.get("date_window_size", "30"))
         attribution_window = int(config.get("attribution_window", "5"))
 
-        #Update url if eu_residency is selected
-        if str(config.get('eu_residency')).lower() == "true":
-            if self.tap_stream_id == 'export':
-                self.url = 'https://data-eu.mixpanel.com/api/2.0'
+        # Update url if eu_residency is selected
+        if str(config.get("eu_residency")).lower() == "true":
+            if self.tap_stream_id == "export":
+                self.url = "https://data-eu.mixpanel.com/api/2.0"
             else:
-                self.url = 'https://eu.mixpanel.com/api/2.0'
+                self.url = "https://eu.mixpanel.com/api/2.0"
 
         # Get the latest bookmark for the stream and set the last_integer/datetime
-        last_datetime = self.get_bookmark(
-            state, self.tap_stream_id, start_date)
+        last_datetime = self.get_bookmark(state, self.tap_stream_id, start_date)
         max_bookmark_value = last_datetime
 
         self.write_schema(catalog, self.tap_stream_id)
 
-        # windowing: loop through date days_interval date windows from last_datetime to now_datetime
+        # Windowing: loop through date days_interval date windows from last_datetime to now_datetime
         tzone = pytz.timezone(project_timezone)
         now_datetime = datetime.now(tzone)
-        end_date = config.get('end_date')
+        end_date = config.get("end_date")
 
         if end_date:
             now_datetime = strptime_to_utc(end_date)
 
         start_window, end_window, days_interval = self.define_bookmark_filters(
-            days_interval, last_datetime, now_datetime, attribution_window)
+            days_interval, last_datetime, now_datetime, attribution_window
+        )
         # LOOP order: Date Windows, Parent IDs, Page
         # Initialize counter
         endpoint_total = 0  # Total for ALL: parents, date windows, and pages
@@ -321,7 +435,7 @@ def sync(self, state, catalog, config, start_date):
             total_records = 0  # Total records for all pages
             record_count = 0  # Total processed for page
 
-            params = self.params  # adds in endpoint specific, sort, filter params
+            params = self.params  # Adds in endpoint specific, sort, filter params
 
             if self.bookmark_query_field_from and self.bookmark_query_field_to:
                 # Request dates need to be normalized to project timezone or else errors may occur
@@ -335,13 +449,15 @@ def sync(self, state, catalog, config, start_date):
                 params[self.bookmark_query_field_from] = from_date
                 params[self.bookmark_query_field_to] = to_date
 
-            # funnels and cohorts have a parent endpoint with parent_data and parent_id_field
+            # Funnels and cohorts have a parent endpoint with parent_data and parent_id_field
             if self.parent_path and self.parent_id_field:
                 # API request data
-                LOGGER.info("URL for Parent Stream %s: %s/%s",
-                            self.tap_stream_id,
-                            self.url,
-                            self.parent_path)
+                LOGGER.info(
+                    "URL for Parent Stream %s: %s/%s",
+                    self.tap_stream_id,
+                    self.url,
+                    self.parent_path,
+                )
                 parent_data = self.client.request(
                     method="GET",
                     url=self.url,
@@ -355,9 +471,11 @@ def sync(self, state, catalog, config, start_date):
 
             for parent_record in parent_data:
                 parent_id = parent_record.get(self.parent_id_field)
-                LOGGER.info('START: Stream: %s, parent_id: %s', self.tap_stream_id, parent_id)
+                LOGGER.info(
+                    "START: Stream: %s, parent_id: %s", self.tap_stream_id, parent_id
+                )
 
-                # pagination: loop thru all pages of data using next (if not None)
+                # Pagination: loop thru all pages of data using next (if not None)
                 page = 0  # First page is page=0, second page is page=1, ...
                 offset = 0
                 limit = 250  # Default page_size
@@ -366,42 +484,66 @@ def sync(self, state, catalog, config, start_date):
                 total_records = 0  # Total records for all pages
                 record_count = 0  # Total processed for page
 
-                session_id = 'initial'
+                session_id = "initial"
                 if self.pagination:
-                    params['page_size'] = limit
+                    params["page_size"] = limit
 
                 # Popped session_id and page number of last parents stream call.
-                params.pop('session_id', None)
-                params.pop('page', None)
+                params.pop("session_id", None)
+                params.pop("page", None)
 
                 while offset <= total_records and session_id is not None:
                     if self.pagination and page != 0:
-                        params['session_id'] = session_id
-                        params['page'] = page
+                        params["session_id"] = session_id
+                        params["page"] = page
 
                     # querystring: Squash query params into string and replace [parent_id]
-                    querystring = '&'.join(['%s=%s' % (key, value) for (key, value) in params.items()])
-                    querystring = querystring.replace('[parent_id]', str(parent_id))
+                    querystring = "&".join(
+                        [f"{key}={value}" for (key, value) in params.items()]
+                    )
+                    querystring = querystring.replace("[parent_id]", str(parent_id))
 
-                    full_url = '{}/{}{}'.format(
-                        self.url,
-                        self.path,
-                        '?{}'.format(querystring) if querystring else '')
+                    full_url = f"{self.url}/{self.path}{f'?{querystring}' if querystring else ''}"
 
-                    LOGGER.info('URL for Stream %s: %s', self.tap_stream_id, full_url)
+                    LOGGER.info("URL for Stream %s: %s", self.tap_stream_id, full_url)
 
                     # API request data
                     # data = {}
 
-                    parent_total, date_total, offset, page, session_id, endpoint_total, max_bookmark_value, total_records = self.get_and_transform_records(
-                        querystring, project_timezone, max_bookmark_value, catalog, last_datetime, endpoint_total, limit, total_records, parent_total, record_count, page, offset, parent_record, date_total)
-                   # End stream != 'export'
-                LOGGER.info('FINISHED: Stream: %s, parent_id: %s', self.tap_stream_id, parent_id)
-                LOGGER.info('Total records for parent: %s', parent_total)
+                    (
+                        parent_total,
+                        date_total,
+                        offset,
+                        page,
+                        session_id,
+                        endpoint_total,
+                        max_bookmark_value,
+                        total_records,
+                    ) = self.get_and_transform_records(
+                        querystring,
+                        project_timezone,
+                        max_bookmark_value,
+                        catalog,
+                        last_datetime,
+                        endpoint_total,
+                        limit,
+                        total_records,
+                        parent_total,
+                        record_count,
+                        page,
+                        offset,
+                        parent_record,
+                        date_total,
+                    )
+                # End stream != 'export'
+                LOGGER.info(
+                    "FINISHED: Stream: %s, parent_id: %s", self.tap_stream_id, parent_id
+                )
+                LOGGER.info("Total records for parent: %s", parent_total)
                 # End parent record loop
-            LOGGER.info("FINISHED Sync for Stream: %s",self.tap_stream_id)
+            LOGGER.info("FINISHED Sync for Stream: %s", self.tap_stream_id)
             if self.bookmark_query_field_from:
-                LOGGER.info("Date window from: %s to %s",from_date, to_date)
+                LOGGER.info("Date window from: %s to %s", from_date, to_date)
             LOGGER.info("Total records for date window: %s", date_total)
             # Increment date window
             start_window = end_window
@@ -424,6 +566,7 @@ class Annotations(MixPanel):
     List the annotations for a given date range.
     Docs: https://developer.mixpanel.com/reference/annotations
     """
+
     tap_stream_id = "annotations"
     key_properties = ["date"]
     path = "annotations"
@@ -438,9 +581,11 @@ class Annotations(MixPanel):
 class CohortMembers(MixPanel):
     """
     The list endpoint returns all of the cohorts in a given project.
-    The JSON formatted return contains the cohort name, id, count, description, creation date, and visibility for every cohort in the project.
+    The JSON formatted return contains the cohort name, id, count,
+    description, creation date, and visibility for every cohort in the project.
     Docs: https://developer.mixpanel.com/reference/engage
     """
+
     tap_stream_id = "cohort_members"
     path = "engage"
     key_properties = ["cohort_id", "distinct_id"]
@@ -457,9 +602,11 @@ class CohortMembers(MixPanel):
 
 class Cohorts(MixPanel):
     """
-    Takes a JSON object with a single key called id whose value is the cohort ID. behaviors and filter_by_cohort are mutually exclusive.
+    Takes a JSON object with a single key called id whose value is the cohort ID.
+    behaviors and filter_by_cohort are mutually exclusive.
     Docs: https://developer.mixpanel.com/reference/cohorts
     """
+
     tap_stream_id = "cohorts"
     path = "cohorts/list"
     key_properties = ["id"]
@@ -476,6 +623,7 @@ class Engage(MixPanel):
     Query user profile data and return list of users that fit specified parameters.
     Docs: https://developer.mixpanel.com/reference/engage
     """
+
     tap_stream_id = "engage"
     path = "engage"
     data_key = "results"
@@ -495,6 +643,7 @@ class Export(MixPanel):
     complete with all event properties (including distinct_id) and the exact timestamp the event was fired.
     Docs: https://developer.mixpanel.com/reference/export
     """
+
     tap_stream_id = "export"
     path = "export"
     data_key = "results"
@@ -506,36 +655,74 @@ class Export(MixPanel):
     params = {}
 
     def get_and_transform_records(
-            self, querystring, project_timezone, max_bookmark_value, catalog, last_datetime, endpoint_total,
-            limit, total_records, parent_total, record_count, page, offset, parent_record, date_total):
+        self,
+        querystring,
+        project_timezone,
+        max_bookmark_value,
+        catalog,
+        last_datetime,
+        endpoint_total,
+        limit,
+        total_records,
+        parent_total,
+        record_count,
+        page,
+        offset,
+        parent_record,
+        date_total,
+    ):
         """
         Get the records using the client get request and transform it using transform_records
-        and return the max_bookmark_value
+        and return the max_bookmark_value.
+
+        Args:
+            querystring (str): Params in URL query format to join with stream path
+            project_timezone (str): Time zone in which integer date times are stored.
+            max_bookmark_value (str): Maximum bookmark value among written records.
+            catalog (singer.Catalog): Catalog object having schema and metadata of all the streams.
+            last_datetime (str): Last datetime from which greater replication value records will be written.
+            endpoint_total (int): Total number of records written yet.
+            limit (int): Page size.
+            total_records (int): Total number of records available for the sync.
+            parent_total (int): # Total records for parent ID
+            record_count (int): Number of records per page written by tap.
+            page (int): Page count.
+            offset (int): Offset value of stream data for the pagination.
+            parent_record (dict): Record of parent stream.
+            date_total (int): Total records written for the date window.
+
+        Raises:
+            Exception: Raises if any key-property is missing.
+
+        Returns:
+            tuple: Returns tuple of parent_total, date_total, offset, page, session_id,
+                   endpoint_total, max_bookmark_value, total_records
         """
         data = self.client.request_export(
-            method='GET',
+            method="GET",
             url=self.url,
             path=self.path,
             params=querystring,
-            endpoint=self.tap_stream_id)
+            endpoint=self.tap_stream_id,
+        )
 
         # time_extracted: datetime when the data was extracted from the API
         time_extracted = utils.now()
         transformed_data = []
         for record in data:
-            if record and str(record) != '':
-                # transform record and append to transformed_data array
-                transformed_record = transform_record(record,
-                                                      self.tap_stream_id,
-                                                      project_timezone)
+            if record and str(record) != "":
+                # Transform record and append to transformed_data array
+                transformed_record = transform_record(
+                    record, self.tap_stream_id, project_timezone
+                )
                 transformed_data.append(transformed_record)
 
                 # Check for missing keys
                 for key in self.key_properties:
                     val = transformed_record.get(key)
                     if not val:
-                        LOGGER.error('Error: Missing Key')
-                        raise 'Missing Key'
+                        LOGGER.error("Error: Missing Key")
+                        raise Exception("Missing Key")
 
                 if len(transformed_data) == limit:
                     # Process full batch (limit = 250) records
@@ -547,8 +734,13 @@ def get_and_transform_records(
                         time_extracted=time_extracted,
                         bookmark_field=next(iter(self.replication_keys), None),
                         max_bookmark_value=max_bookmark_value,
-                        last_datetime=last_datetime)
-                    LOGGER.info('Stream %s, batch processed %s records', self.tap_stream_id, record_count)
+                        last_datetime=last_datetime,
+                    )
+                    LOGGER.info(
+                        "Stream %s, batch processed %s records",
+                        self.tap_stream_id,
+                        record_count,
+                    )
 
                     total_records = total_records + record_count
                     parent_total = parent_total + record_count
@@ -568,8 +760,13 @@ def get_and_transform_records(
                 time_extracted=time_extracted,
                 bookmark_field=next(iter(self.replication_keys), None),
                 max_bookmark_value=max_bookmark_value,
-                last_datetime=last_datetime)
-            LOGGER.info('Stream %s, batch processed %s records', self.tap_stream_id, record_count)
+                last_datetime=last_datetime,
+            )
+            LOGGER.info(
+                "Stream %s, batch processed %s records",
+                self.tap_stream_id,
+                record_count,
+            )
 
             total_records = total_records + record_count
             parent_total = parent_total + record_count
@@ -579,14 +776,25 @@ def get_and_transform_records(
 
         # Export does not provide pagination; session_id = None breaks out of loop.
         session_id = None
-        return parent_total, date_total, offset, page, session_id, endpoint_total, max_bookmark_value, total_records
+        return (
+            parent_total,
+            date_total,
+            offset,
+            page,
+            session_id,
+            endpoint_total,
+            max_bookmark_value,
+            total_records,
+        )
 
 
 class Funnels(MixPanel):
     """
-    Get data for a funnel. funnel_id as a parameter to the API to get the funnel that you wish to get data for.
+    Get data for a funnel.
+    funnel_id as a parameter to the API to get the funnel that you wish to get data for.
     Docs: https://developer.mixpanel.com/reference/funnels
     """
+
     tap_stream_id = "funnels"
     path = "funnels"
     key_properties = ["funnel_id", "date"]
diff --git a/tap_mixpanel/sync.py b/tap_mixpanel/sync.py
index 613d703..cf0b218 100644
--- a/tap_mixpanel/sync.py
+++ b/tap_mixpanel/sync.py
@@ -6,14 +6,15 @@
 
 
 def update_currently_syncing(state, stream_name):
-    """
-    Currently syncing sets the stream currently being delivered in the state.
+    """Currently syncing sets the stream currently being delivered in the
+    state.
+
     If the integration is interrupted, this state property is used to identify
      the starting point to continue from.
     Reference: https://github.com/singer-io/singer-python/blob/master/singer/bookmarks.py#L41-L46
     """
-    if (stream_name is None) and ('currently_syncing' in state):
-        del state['currently_syncing']
+    if (stream_name is None) and ("currently_syncing" in state):
+        del state["currently_syncing"]
     else:
         singer.set_currently_syncing(state, stream_name)
     singer.write_state(state)
@@ -25,26 +26,27 @@ def sync(client, config, catalog, state, start_date):
     last_stream = Previous currently synced stream, if the load was interrupted
     """
     last_stream = singer.get_currently_syncing(state)
-    LOGGER.info('last/currently syncing stream: %s', last_stream)
+    LOGGER.info("last/currently syncing stream: %s", last_stream)
     selected_streams = []
     for stream in catalog.get_selected_streams(state):
         selected_streams.append(stream.stream)
-    LOGGER.info('selected_streams: %s', selected_streams)
+    LOGGER.info("selected_streams: %s", selected_streams)
 
     if not selected_streams:
         return
 
     # Loop through selected_streams
     for stream_name in selected_streams:
-        LOGGER.info('START Syncing: %s', stream_name)
+        LOGGER.info("START Syncing: %s", stream_name)
         update_currently_syncing(state, stream_name)
         stream_obj = STREAMS[stream_name](client)
         endpoint_total = stream_obj.sync(
-            catalog=catalog,
-            state=state,
-            config=config,
-            start_date=start_date
+            catalog=catalog, state=state, config=config, start_date=start_date
         )
 
         update_currently_syncing(state, None)
-        LOGGER.info('FINISHED Syncing: %s, Total endpoint records: %s', stream_name, endpoint_total)
+        LOGGER.info(
+            "FINISHED Syncing: %s, Total endpoint records: %s",
+            stream_name,
+            endpoint_total,
+        )
diff --git a/tap_mixpanel/transform.py b/tap_mixpanel/transform.py
index 18d059f..042ec48 100644
--- a/tap_mixpanel/transform.py
+++ b/tap_mixpanel/transform.py
@@ -1,20 +1,31 @@
 import datetime
+
 import pytz
 import singer
-from singer.utils import strftime
 from singer import Transformer
+from singer.utils import strftime
 
 LOGGER = singer.get_logger()
 
-# De-nest properties for engage and export endpoints
+
 def denest_properties(record, properties_node):
+    """De-nest properties for engage and export endpoints. Write fields to
+    first level from `properties_node`.
+
+    Args:
+        record (dict): Record to update.
+        properties_node (str): Nested object whose fields will be written at 1st level.
+
+    Returns:
+        dict: Updated record
+    """
     new_record = record
     properties = record.get(properties_node)
     if properties:
         for key, val in record[properties_node].items():
-            if key[0:1] == '$':
-                new_key = 'mp_reserved_{}'.format(key[1:])
-                # change this to regex
+            if key[0:1] == "$":
+                new_key = f"mp_reserved_{key[1:]}"
+                # Change this to regex
             else:
                 new_key = key
             new_record[new_key] = val
@@ -22,9 +33,17 @@ def denest_properties(record, properties_node):
     return new_record
 
 
-# Time conversion from $time integer using project_timezone
 # Reference: https://help.mixpanel.com/hc/en-us/articles/115004547203-Manage-Timezones-for-Projects-in-Mixpanel#exporting-data-from-mixpanel
 def transform_event_times(record, project_timezone):
+    """Time conversion from $time integer using project_timezone.
+
+    Args:
+        record (dict): Record to be transform.
+        project_timezone (str): Time zone in which integer date times are stored.
+
+    Returns:
+        dict: Updated record.
+    """
     new_record = record
     timezone = pytz.timezone(project_timezone)
 
@@ -37,7 +56,7 @@ def transform_event_times(record, project_timezone):
     beginning_datetime = pytz.utc.localize(naive_datetime).astimezone(timezone)
 
     # Get integer time
-    time_int = int(record.get('time'))
+    time_int = int(record.get("time"))
 
     # Create new_time_utc by adding seconds to beginning_datetime, normalizing,
     #   and converting to string
@@ -46,52 +65,96 @@ def transform_event_times(record, project_timezone):
 
     # 'normalize' accounts for daylight savings time
     new_time_utc_str = strftime(timezone.normalize(new_time).astimezone(pytz.utc))
-    new_record['time'] = new_time_utc_str
+    new_record["time"] = new_time_utc_str
 
     return new_record
 
+
 def transform_datetime(this_dttm):
-        with Transformer() as transformer:
-            new_dttm = transformer._transform_datetime(this_dttm)
-        return new_dttm
+    """Transform date_time string TO DATETIME object.
+
+    Args:
+        this_dttm (str): Formatted date-time string
+
+    Returns:
+        datetime: Datetime object passed string.
+    """
+    with Transformer() as transformer:
+        new_dttm = transformer._transform_datetime(this_dttm)
+    return new_dttm
 
 
-# Remove leading $ from engage $distinct_id
 def transform_engage(record):
+    """Remove leading $ from engage $distinct_id.
+
+    Args:
+        record (dict): record to be update.
+
+    Returns:
+        dict: New updated record.
+    """
     new_record = record
-    distinct_id = record.get('$distinct_id')
-    new_record['distinct_id'] = distinct_id
-    new_record.pop('$distinct_id', None)
+    distinct_id = record.get("$distinct_id")
+    new_record["distinct_id"] = distinct_id
+    new_record.pop("$distinct_id", None)
     return new_record
 
 
-# Funnels: combine parent record with each date record
 def transform_funnels(record, parent_record):
+    """Funnels: Combine parent record with each date record
+
+    Args:
+        record (dict): Record to be transform.
+        parent_record (dict): Parent record.
+
+    Returns:
+        dict: Updated record.
+    """
     record.update(parent_record)
     return record
 
 
-# Cohort Members: provide all distinct_id's for each cohort_id
 def transform_cohort_members(record, parent_record):
-    cohort_id = parent_record.get('id')
-    distinct_id = record.get('$distinct_id')
+    """Cohort Members: provide all distinct_id's for each cohort_id.
+
+    Args:
+        record (dict): Record to be transform.
+        parent_record (dict): Parent stream  record.
+
+    Returns:
+        dict: Record with id fields.
+    """
+    cohort_id = parent_record.get("id")
+    distinct_id = record.get("$distinct_id")
     new_record = {}
-    new_record['distinct_id'] = distinct_id
-    new_record['cohort_id'] = cohort_id
+    new_record["distinct_id"] = distinct_id
+    new_record["cohort_id"] = cohort_id
     return new_record
 
 
 # Run other transforms, as needed: denest_list_nodes, transform_conversation_parts
 def transform_record(record, stream_name, project_timezone, parent_record=None):
-    if stream_name == 'engage':
+    """Transform record and add fields at first level as required by stream.
+
+    Args:
+        record (dict): Record to be transform.
+        stream_name (str): Stream that record belongs to.
+        project_timezone (str): Time zone in which integer date times are stored.
+        parent_record (dict, optional): Parent stream record if current stream is child.
+                                        Defaults to None.
+
+    Returns:
+        dict: Transformed record.
+    """
+    if stream_name == "engage":
         trans_json = transform_engage(record)
-        new_record = denest_properties(trans_json, '$properties')
-    elif stream_name == 'export':
-        denested_json = denest_properties(record, 'properties')
+        new_record = denest_properties(trans_json, "$properties")
+    elif stream_name == "export":
+        denested_json = denest_properties(record, "properties")
         new_record = transform_event_times(denested_json, project_timezone)
-    elif stream_name == 'funnels':
+    elif stream_name == "funnels":
         new_record = transform_funnels(record, parent_record)
-    elif stream_name == 'cohort_members':
+    elif stream_name == "cohort_members":
         new_record = transform_cohort_members(record, parent_record)
     else:
         new_record = record
diff --git a/tests/configuration/fixtures.py b/tests/configuration/fixtures.py
index 1f53e9f..869f533 100644
--- a/tests/configuration/fixtures.py
+++ b/tests/configuration/fixtures.py
@@ -1,10 +1,14 @@
 import pytest
+
 from tap_mixpanel.client import MixpanelClient
 
 
 @pytest.fixture
 def mixpanel_client():
-    # Support of request_timeout have been added. So, now MixpanelClient accept request_timeout parameter which is mandatory
-    mixpanel_client = MixpanelClient('API_SECRET', api_domain="mixpanel.com", request_timeout=1) # Pass extra request_timeout parameter
+    # Support of request_timeout have been added.
+    # So, now MixpanelClient accept request_timeout parameter which is mandatory
+    mixpanel_client = MixpanelClient(
+        "API_SECRET", api_domain="mixpanel.com", request_timeout=1
+    )  # Pass extra request_timeout parameter
     mixpanel_client._MixpanelClient__verified = True
     return mixpanel_client
diff --git a/tests/tap_tester/base.py b/tests/tap_tester/base.py
index 5700364..b3e24c0 100644
--- a/tests/tap_tester/base.py
+++ b/tests/tap_tester/base.py
@@ -3,23 +3,19 @@
 """
 
 import os
-import unittest
 from datetime import datetime as dt
 from datetime import timedelta
 
 import dateutil.parser
 import pytz
-
-from tap_tester import connections
-from tap_tester import runner
-from tap_tester import menagerie
-from tap_tester.logger import LOGGER
-from tap_tester.base_case import BaseCase
+from tap_tester import LOGGER, BaseCase, connections, menagerie, runner
 
 
 class TestMixPanelBase(BaseCase):
-    """ Test the tap combined """
+    """Test the tap combined."""
+
     START_DATE_FORMAT = "%Y-%m-%dT00:00:00Z"
+    BOOKMARK_COMPARISON_FORMAT = "%Y-%m-%dT%H:%M:%S.%fZ"
     REPLICATION_KEYS = "valid-replication-keys"
     PRIMARY_KEYS = "table-key-properties"
     FOREIGN_KEYS = "table-foreign-key-properties"
@@ -28,49 +24,56 @@ class TestMixPanelBase(BaseCase):
     FULL_TABLE = "FULL_TABLE"
     API_LIMIT = 250
     TYPE = "platform.mixpanel"
+    OBEYS_START_DATE = "obey-start-date"
     start_date = ""
     end_date = ""
     eu_residency = True
 
     def tap_name(self):
-        """The name of the tap"""
+        """The name of the tap."""
         return "tap-mixpanel"
 
     def expected_metadata(self):
-        """The expected streams and metadata about the streams"""
+        """The expected streams and metadata about the streams."""
         return {
-            'export': {
+            "export": {
                 self.PRIMARY_KEYS: set(),
                 self.REPLICATION_METHOD: self.INCREMENTAL,
-                self.REPLICATION_KEYS: {'time'},
+                self.REPLICATION_KEYS: {"time"},
+                self.OBEYS_START_DATE: True,
             },
-            'engage': {
+            "engage": {
                 self.PRIMARY_KEYS: {"distinct_id"},
                 self.REPLICATION_METHOD: self.FULL_TABLE,
+                self.OBEYS_START_DATE: False,
             },
-            'funnels': {
-                self.PRIMARY_KEYS: {'funnel_id', 'date'},
+            "funnels": {
+                self.PRIMARY_KEYS: {"funnel_id", "date"},
                 self.REPLICATION_METHOD: self.INCREMENTAL,
-                self.REPLICATION_KEYS: {'datetime'},
+                self.REPLICATION_KEYS: {"datetime"},
+                self.OBEYS_START_DATE: True,
             },
-            'cohorts': {
+            "cohorts": {
                 self.PRIMARY_KEYS: {"id"},
                 self.REPLICATION_METHOD: self.FULL_TABLE,
+                self.OBEYS_START_DATE: False,
             },
-            'cohort_members': {
+            "cohort_members": {
                 self.PRIMARY_KEYS: {"cohort_id", "distinct_id"},
                 self.REPLICATION_METHOD: self.FULL_TABLE,
+                self.OBEYS_START_DATE: False,
             },
-            'revenue': {
+            "revenue": {
                 self.PRIMARY_KEYS: {"date"},
                 self.REPLICATION_METHOD: self.INCREMENTAL,
-                self.REPLICATION_KEYS: {'datetime'},
+                self.REPLICATION_KEYS: {"datetime"},
+                self.OBEYS_START_DATE: True,
             },
-
-            'annotations': {
+            "annotations": {
                 self.PRIMARY_KEYS: {"date"},
-                self.REPLICATION_METHOD: self.FULL_TABLE
-            }
+                self.REPLICATION_METHOD: self.FULL_TABLE,
+                self.OBEYS_START_DATE: False,
+            },
         }
 
     def setUp(self):
@@ -90,23 +93,25 @@ def setUp(self):
         BaseCase.setUp(self)
 
     def get_type(self):
-        """the expected url route ending"""
+        """The expected url route ending."""
         return "platform.mixpanel"
 
     def get_properties(self, original: bool = True):
         """Configuration properties required for the tap."""
 
         return_value = {
-            'start_date': '2020-02-01T00:00:00Z',
-            'end_date': '2020-03-01T00:00:00Z',
-            'date_window_size': '30',
-            'attribution_window': '5',
-            'project_timezone': 'US/Pacific',
-            "eu_residency": 'false',
-            'select_properties_by_default': 'false'
+            "start_date": "2020-02-01T00:00:00Z",
+            "end_date": "2020-03-01T00:00:00Z",
+            "date_window_size": "30",
+            "attribution_window": "5",
+            "project_timezone": "US/Pacific",
+            "eu_residency": "false",
+            "select_properties_by_default": "false",
         }
         if self.eu_residency:
-            return_value.update({"project_timezone": "Europe/Amsterdam", "eu_residency": 'true'})
+            return_value.update(
+                {"project_timezone": "Europe/Amsterdam", "eu_residency": "true"}
+            )
 
         if original:
             return return_value
@@ -118,7 +123,10 @@ def get_start_date(self):
         return dt.strftime(dt.utcnow() - timedelta(days=30), self.START_DATE_FORMAT)
 
     def get_credentials(self):
-        """Authentication information for the test account. Api secret is expected as a property."""
+        """
+        Authentication information for the test account.
+        Api secret is expected as a property.
+        """
 
         credentials_dict = {}
         if self.eu_residency:
@@ -132,10 +140,10 @@ def get_credentials(self):
         return credentials_dict
 
     def expected_streams(self):
-        """A set of expected stream names"""
+        """A set of expected stream names."""
 
-        # Skip `export` and `revenue` stream for EU recidency server as
-        # revenue stream endpoint returns 400 bad reuqest and
+        # Skip `export` and `revenue` stream for EU residency server as
+        # revenue stream endpoint returns 400 bad request and
         # export stream endpoint returns 200 terminated early response.
         # So, as per discussion decided that let the customer come with the issues
         # that these streams are not working. Skip the streams in the circleci.
@@ -145,29 +153,35 @@ def expected_streams(self):
         return set(self.expected_metadata().keys())
 
     def expected_pks(self):
-        """return a dictionary with key of table name and value as a set of primary key fields"""
-        return {table: properties.get(self.PRIMARY_KEYS, set())
-                for table, properties
-                in self.expected_metadata().items()}
+        """Return a dictionary with key of table name and value as a set of primary key fields"""
+        return {
+            table: properties.get(self.PRIMARY_KEYS, set())
+            for table, properties in self.expected_metadata().items()
+        }
 
     def expected_replication_keys(self):
-        """return a dictionary with key of table name and value as a set of replication key fields"""
-        return {table: properties.get(self.REPLICATION_KEYS, set())
-                for table, properties
-                in self.expected_metadata().items()}
+        """Return a dictionary with key of table name and value as a set of replication key fields"""
+        return {
+            table: properties.get(self.REPLICATION_KEYS, set())
+            for table, properties in self.expected_metadata().items()
+        }
 
     def expected_replication_method(self):
-        """return a dictionary with key of table name nd value of replication method"""
-        return {table: properties.get(self.REPLICATION_METHOD, None)
-                for table, properties
-                in self.expected_metadata().items()}
+        """Return a dictionary with key of table name and value of replication method"""
+        return {
+            table: properties.get(self.REPLICATION_METHOD, None)
+            for table, properties in self.expected_metadata().items()
+        }
 
     def expected_automatic_fields(self):
-        """return a dictionary with key of table name and value as a set of automatic key fields"""
+        """Return a dictionary with key of table name and value as a set of automatic key fields"""
         auto_fields = {}
         for k, v in self.expected_metadata().items():
-            auto_fields[k] = v.get(self.PRIMARY_KEYS, set()) | v.get(self.REPLICATION_KEYS, set()) \
+            auto_fields[k] = (
+                v.get(self.PRIMARY_KEYS, set())
+                | v.get(self.REPLICATION_KEYS, set())
                 | v.get(self.FOREIGN_KEYS, set())
+            )
         return auto_fields
 
     #########################
@@ -188,15 +202,18 @@ def run_and_verify_check_mode(self, conn_id):
         menagerie.verify_check_exit_status(self, exit_status, check_job_name)
 
         found_catalogs = menagerie.get_catalogs(conn_id)
-        self.assertGreater(len(
-            found_catalogs), 0, msg="unable to locate schemas for connection {}".format(conn_id))
+        self.assertGreater(
+            len(found_catalogs),
+            0,
+            msg=f"Unable to locate schemas for connection {conn_id}",
+        )
 
-        found_catalog_names = set(
-            map(lambda c: c['stream_name'], found_catalogs))
+        found_catalog_names = set(map(lambda c: c["stream_name"], found_catalogs))
 
         subset = self.expected_streams().issubset(found_catalog_names)
         self.assertTrue(
-            subset, msg="Expected check streams are not subset of discovered catalog")
+            subset, msg="Expected check streams are not subset of discovered catalog"
+        )
         LOGGER.info("discovered schemas are OK")
 
         return found_catalogs
@@ -217,87 +234,97 @@ def run_and_verify_sync(self, conn_id):
 
         # Verify actual rows were synced
         sync_record_count = runner.examine_target_output_file(
-            self, conn_id, self.expected_streams(), self.expected_pks())
+            self, conn_id, self.expected_streams(), self.expected_pks()
+        )
         self.assertGreater(
-            sum(sync_record_count.values()), 0,
-            msg="failed to replicate any data: {}".format(sync_record_count)
+            sum(sync_record_count.values()),
+            0,
+            msg=f"failed to replicate any data: {sync_record_count}",
         )
         LOGGER.info(f"total replicated row count: {sum(sync_record_count.values())}")
 
         return sync_record_count
 
-    def perform_and_verify_table_and_field_selection(self, conn_id, test_catalogs, select_all_fields=True):
+    def perform_and_verify_table_and_field_selection(
+        self, conn_id, test_catalogs, select_all_fields=True
+    ):
         """
         Perform table and field selection based off of the streams to select
         set and field selection parameters.
-        Verify this results in the expected streams selected and all or no
-        fields selected for those streams.
+        Verify this results in the expected streams selected and all or
+        no fields selected for those streams.
         """
 
         # Select all available fields or select no fields from all testable streams
-        self.select_all_streams_and_fields(
-            conn_id, test_catalogs, select_all_fields)
+        self.select_all_streams_and_fields(conn_id, test_catalogs, select_all_fields)
 
         catalogs = menagerie.get_catalogs(conn_id)
 
         # Ensure our selection affects the catalog
-        expected_selected = [tc.get('stream_name') for tc in test_catalogs]
+        expected_selected = [cat.get("stream_name") for cat in test_catalogs]
 
         for cat in catalogs:
-            catalog_entry = menagerie.get_annotated_schema(
-                conn_id, cat['stream_id'])
+            catalog_entry = menagerie.get_annotated_schema(conn_id, cat["stream_id"])
 
             # Verify all testable streams are selected
-            selected = catalog_entry.get('annotated-schema').get('selected')
+            selected = catalog_entry.get("annotated-schema").get("selected")
             LOGGER.info(f"Validating selection on {cat['stream_name']}: {selected}")
 
-            if cat['stream_name'] not in expected_selected:
-                self.assertFalse(
-                    selected, msg="Stream selected, but not testable.")
+            if cat["stream_name"] not in expected_selected:
+                self.assertFalse(selected, msg="Stream selected, but not testable.")
                 continue  # Skip remaining assertions if we aren't selecting this stream
             self.assertTrue(selected, msg="Stream not selected.")
 
             if select_all_fields:
                 # Verify all fields within each selected stream are selected
-                for field, field_props in catalog_entry.get('annotated-schema').get('properties').items():
-                    field_selected = field_props.get('selected')
-                    LOGGER.info(f"\tValidating selection on {cat['stream_name']}.{field}: {field_selected}")
+                for field, field_props in (
+                    catalog_entry.get("annotated-schema").get("properties").items()
+                ):
+                    field_selected = field_props.get("selected")
+                    LOGGER.info(
+                        f"\tValidating selection on {cat['stream_name']}.{field}: {field_selected}"
+                    )
 
                     self.assertTrue(field_selected, msg="Field not selected.")
             else:
                 # Verify only automatic fields are selected
                 expected_automatic_fields = self.expected_automatic_fields().get(
-                    cat['stream_name'])
+                    cat["stream_name"]
+                )
                 selected_fields = self.get_selected_fields_from_metadata(
-                    catalog_entry['metadata'])
+                    catalog_entry["metadata"]
+                )
                 self.assertEqual(expected_automatic_fields, selected_fields)
 
     def get_selected_fields_from_metadata(self, metadata):
         selected_fields = set()
         for field in metadata:
-            is_field_metadata = len(field['breadcrumb']) > 1
+            is_field_metadata = len(field["breadcrumb"]) > 1
             inclusion_automatic_or_selected = (
-                field['metadata']['selected'] is True or
-                field['metadata']['inclusion'] == 'automatic'
+                field["metadata"]["selected"] is True or
+                field["metadata"]["inclusion"] == "automatic"
             )
             if is_field_metadata and inclusion_automatic_or_selected:
-                selected_fields.add(field['breadcrumb'][1])
+                selected_fields.add(field["breadcrumb"][1])
         return selected_fields
 
-    def select_all_streams_and_fields(self, conn_id, catalogs, select_all_fields: bool = True):
-        """Select all streams and all fields within streams"""
+    def select_all_streams_and_fields(
+        self, conn_id, catalogs, select_all_fields: bool = True
+    ):
+        """Select all streams and all fields within streams."""
         for catalog in catalogs:
-            schema = menagerie.get_annotated_schema(
-                conn_id, catalog['stream_id'])
+            schema = menagerie.get_annotated_schema(conn_id, catalog["stream_id"])
 
             non_selected_properties = []
             if not select_all_fields:
                 # get a list of all properties so that none are selected
-                non_selected_properties = schema.get('annotated-schema', {}).get(
-                    'properties', {}).keys()
+                non_selected_properties = (
+                    schema.get("annotated-schema", {}).get("properties", {}).keys()
+                )
 
             connections.select_catalog_and_fields_via_metadata(
-                conn_id, catalog, schema, [], non_selected_properties)
+                conn_id, catalog, schema, [], non_selected_properties
+            )
 
     def parse_date(self, date_value):
         """
@@ -308,7 +335,7 @@ def parse_date(self, date_value):
             "%Y-%m-%dT%H:%M:%SZ",
             "%Y-%m-%dT%H:%M:%S.%f+00:00",
             "%Y-%m-%dT%H:%M:%S+00:00",
-            "%Y-%m-%d"
+            "%Y-%m-%d",
         }
         for date_format in date_formats:
             try:
@@ -318,22 +345,31 @@ def parse_date(self, date_value):
                 continue
 
         raise NotImplementedError(
-            "Tests do not account for dates of this format: {}".format(date_value))
+            f"Tests do not account for dates of this format: {date_value}"
+        )
 
     def calculated_states_by_stream(self, current_state):
-        timedelta_by_stream = {stream: [1,0,0]  # {stream_name: [days, hours, minutes], ...}
-                               for stream in self.expected_streams()}
+        timedelta_by_stream = {
+            stream: [1, 0, 0]  # {stream_name: [days, hours, minutes], ...}
+            for stream in self.expected_streams()
+        }
 
-        stream_to_calculated_state = {stream: "" for stream in current_state['bookmarks'].keys()}
-        for stream, state in current_state['bookmarks'].items():
+        stream_to_calculated_state = {
+            stream: "" for stream in current_state["bookmarks"].keys()
+        }
+        for stream, state in current_state["bookmarks"].items():
 
             state_as_datetime = dateutil.parser.parse(state)
 
             days, hours, minutes = timedelta_by_stream[stream]
-            calculated_state_as_datetime = state_as_datetime - timedelta(days=days, hours=hours, minutes=minutes)
+            calculated_state_as_datetime = state_as_datetime - timedelta(
+                days=days, hours=hours, minutes=minutes
+            )
 
-            state_format = '%Y-%m-%dT%H:%M:%S-00:00'
-            calculated_state_formatted = dt.strftime(calculated_state_as_datetime, state_format)
+            state_format = "%Y-%m-%dT%H:%M:%S-00:00"
+            calculated_state_formatted = dt.strftime(
+                calculated_state_as_datetime, state_format
+            )
 
             stream_to_calculated_state[stream] = calculated_state_formatted
 
@@ -347,7 +383,7 @@ def convert_state_to_utc(self, date_str):
         """
         Convert a saved bookmark value of the form '2020-08-25T13:17:36-07:00' to
         a string formatted utc datetime,
-        in order to compare aginast json formatted datetime values
+        in order to compare against json formatted datetime values.
         """
         date_object = dateutil.parser.parse(date_str)
         date_object_utc = date_object.astimezone(tz=pytz.UTC)
@@ -362,14 +398,20 @@ def timedelta_formatted(self, dtime, days=0):
 
         except ValueError:
             try:
-                date_stripped = dt.strptime(
-                    dtime, self.BOOKMARK_COMPARISON_FORMAT)
+                date_stripped = dt.strptime(dtime, self.BOOKMARK_COMPARISON_FORMAT)
                 return_date = date_stripped + timedelta(days=days)
 
                 return dt.strftime(return_date, self.BOOKMARK_COMPARISON_FORMAT)
 
             except ValueError:
-                return Exception("Datetime object is not of the format: {}".format(self.START_DATE_FORMAT))
+                return Exception(
+                    "Datetime object is not of the format: {}".format(
+                        self.START_DATE_FORMAT
+                    )
+                )
 
     def is_incremental(self, stream):
-        return self.expected_metadata().get(stream).get(self.REPLICATION_METHOD) == self.INCREMENTAL
+        return (
+            self.expected_metadata().get(stream).get(self.REPLICATION_METHOD)
+            == self.INCREMENTAL
+        )
diff --git a/tests/tap_tester/test_all_fields_pagination.py b/tests/tap_tester/test_all_fields_pagination.py
deleted file mode 100644
index 31ffe79..0000000
--- a/tests/tap_tester/test_all_fields_pagination.py
+++ /dev/null
@@ -1,163 +0,0 @@
-from math import ceil
-
-import tap_tester.connections as connections
-import tap_tester.runner as runner
-import tap_tester.menagerie as menagerie
-from tap_tester.logger import LOGGER
-
-from base import TestMixPanelBase
-
-
-class MixPanelPaginationAllFieldsTest(TestMixPanelBase):
-
-    @staticmethod
-    def name():
-        return "mixpanel_pagination_all_fields_test"
-
-    def pagination_test_run(self):
-        """
-        All Fields Test
-        • and that when all fields are selected more than the automatic fields are replicated.
-        • Verify no unexpected streams were replicated
-        • Verify that more than just the automatic fields are replicated for each stream.
-        • verify all fields for each stream are replicated
-        • verify that the automatic fields are sent to the target
-
-
-        Pagination Test
-        • Verify that for each stream you can get multiple pages of data
-        • Verify no duplicate pages are replicated
-        • Verify no unexpected streams were replicated
-
-        PREREQUISITE
-        For EACH stream add enough data that you surpass the limit of a single
-        fetch of data.  For instance if you have a limit of 250 records ensure
-        that 251 (or more) records have been posted for that stream.
-        """
-
-        # Only following below 2 streams support pagination
-        streams_to_test_all_fields = self.expected_streams()
-        streams_to_test_pagination = {'engage', 'cohort_members'}
-
-        expected_automatic_fields = self.expected_automatic_fields()
-        conn_id = connections.ensure_connection(self)
-
-        found_catalogs = self.run_and_verify_check_mode(conn_id)
-
-        # table and field selection
-        test_catalogs_all_fields = [catalog for catalog in found_catalogs
-                                    if catalog.get('tap_stream_id') in streams_to_test_all_fields]
-
-        self.perform_and_verify_table_and_field_selection(
-            conn_id, test_catalogs_all_fields)
-
-        # grab metadata after performing table-and-field selection to set expectations
-        # used for asserting all fields are replicated
-        stream_to_all_catalog_fields = dict()
-        for catalog in test_catalogs_all_fields:
-            stream_id, stream_name = catalog['stream_id'], catalog['stream_name']
-            catalog_entry = menagerie.get_annotated_schema(conn_id, stream_id)
-            fields_from_field_level_md = [md_entry['breadcrumb'][1]
-                                          for md_entry in catalog_entry['metadata']
-                                          if md_entry['breadcrumb'] != []]
-            stream_to_all_catalog_fields[stream_name] = set(
-                fields_from_field_level_md)
-
-        record_count_by_stream = self.run_and_verify_sync(conn_id)
-
-        actual_fields_by_stream = runner.examine_target_output_for_fields()
-
-        synced_records = runner.get_records_from_target_output()
-
-        # Verify no unexpected streams were replicated
-        synced_stream_names = set(synced_records.keys())
-        self.assertSetEqual(streams_to_test_all_fields, synced_stream_names)
-
-        # All Fields Test
-        for stream in streams_to_test_all_fields:
-            with self.subTest(logging="Primary Functional Test", stream=stream):
-
-                # expected values
-                expected_all_keys = stream_to_all_catalog_fields[stream]
-                expected_automatic_keys = expected_automatic_fields.get(
-                    stream, set())
-
-                # collect actual values
-                messages = synced_records.get(stream)
-                actual_all_keys = set()
-                for message in messages['messages']:
-                    if message['action'] == 'upsert':
-                        actual_all_keys.update(set(message['data'].keys()))
-
-                # verify that the automatic fields are sent to the target
-                self.assertTrue(
-                    actual_fields_by_stream.get(stream, set()).issuperset(
-                        expected_automatic_keys),
-                    msg="The fields sent to the target don't include all automatic fields")
-
-                # Verify that more than just the automatic fields are replicated for each stream.
-                if stream != "cohort_members":  # cohort_member has just 2 key and both are automatic
-                    self.assertGreater(len(expected_all_keys),
-                                       len(expected_automatic_keys))
-
-                self.assertTrue(expected_automatic_keys.issubset(
-                    expected_all_keys), msg=f'{expected_automatic_keys-expected_all_keys} is not in "expected_all_keys"')
-
-                # As we can't find the below fields in the docs and also
-                # it won't be generated by mixpanel APIs now so expected.
-                if stream == "export":
-                    expected_all_keys = expected_all_keys - {'labels', 'sampling_factor', 'dataset', 'mp_reserved_duration_s', 'mp_reserved_origin_end',
-                                                             'mp_reserved_origin_start', 'mp_reserved_event_count'}
-
-                # verify all fields for each stream are replicated
-                if not stream == "engage": #Skip engage as it return records in random manner with dynamic fields.
-                    self.assertSetEqual(expected_all_keys, actual_all_keys)
-
-        # Pagination Test
-        for stream in streams_to_test_pagination:
-            with self.subTest(stream=stream):
-
-                # expected values
-                expected_primary_keys = self.expected_pks()[stream]
-
-                # collect actual values
-                messages = synced_records.get(stream)
-                primary_keys_list = [tuple([message['data'][expected_pk] for expected_pk in expected_primary_keys])
-                                     for message in messages['messages'] if message['action'] == 'upsert']
-
-                # verify that we can paginate with all fields selected
-                record_count_sync = record_count_by_stream.get(stream, 0)
-                self.assertGreater(record_count_sync, self.API_LIMIT,
-                                   msg="The number of records is not over the stream max limit")
-
-
-                # Chunk the replicated records (just primary keys) into expected pages
-                pages = []
-                page_count = ceil(len(primary_keys_list) / self.API_LIMIT)
-                page_size = self.API_LIMIT
-                for page_index in range(page_count):
-                    page_start = page_index * page_size
-                    page_end = (page_index + 1) * page_size
-                    pages.append(set(primary_keys_list[page_start:page_end]))
-
-                # Verify by primary keys that data is unique for each page
-                for current_index, current_page in enumerate(pages):
-                    with self.subTest(current_page_primary_keys=current_page):
-
-                        for other_index, other_page in enumerate(pages):
-                            if current_index == other_index:
-                                continue  # don't compare the page to itself
-
-                            self.assertTrue(
-                                current_page.isdisjoint(other_page), msg=f'other_page_primary_keys={other_page}'
-                            )
-
-    def test_run(self):
-        # Pagination test for standard server
-        self.eu_residency = False
-        self.pagination_test_run()
-
-
-        # Pagination test for EU residency server
-        self.eu_residency = True
-        self.pagination_test_run()
diff --git a/tests/tap_tester/test_discovery.py b/tests/tap_tester/test_discovery.py
deleted file mode 100644
index 5442212..0000000
--- a/tests/tap_tester/test_discovery.py
+++ /dev/null
@@ -1,142 +0,0 @@
-import re
-from tap_tester import menagerie, connections
-from tap_tester.logger import LOGGER
-
-from base import TestMixPanelBase
-
-class MixPanelDiscoverTest(TestMixPanelBase):
-    """
-        Testing that discovery creates the appropriate catalog with valid metadata.
-        • Verify number of actual streams discovered match expected
-        • Verify the stream names discovered were what we expect
-        • Verify stream names follow naming convention
-          streams should only have lowercase alphas and underscores
-        • verify there is only 1 top level breadcrumb
-        • verify replication key(s)
-        • verify primary key(s)
-        • verify that if there is a replication key we are doing INCREMENTAL otherwise FULL
-        • verify the actual replication matches our expected replication method
-        • verify that primary, replication and foreign keys
-          are given the inclusion of automatic.
-        • verify that all other fields have inclusion of available metadata.
-    """
-
-    @staticmethod
-    def name():
-        return "mix_panel_discover_test"
-
-    def discovery_test_run(self):
-
-        region = "EU" if self.eu_residency else "Standard"
-        LOGGER.info(f"Testing against {region} account.")
-
-        self.assertion_logging_enabled = True
-
-        streams_to_test = self.expected_streams()
-
-        conn_id = connections.ensure_connection(self, payload_hook=None)
-
-        # Verify that there are catalogs found
-        found_catalogs = self.run_and_verify_check_mode(conn_id)
-
-        # Verify stream names follow naming convention
-        # streams should only have lowercase alphas and underscores
-        found_catalog_names = {c['tap_stream_id'] for c in found_catalogs}
-        self.assertTrue(all([re.fullmatch(r"[a-z_]+",  name) for name in found_catalog_names]),
-                        logging="asserting all streams defined in catalog follow the naming convention '[a-z_]+'")
-
-        for stream in streams_to_test:
-            with self.subTest(stream=stream):
-
-                # Verify the caatalog is found for a given stream
-                catalog = next(iter([catalog for catalog in found_catalogs
-                                     if catalog["stream_name"] == stream]))
-                self.assertIsNotNone(catalog, logging="asserting entry is present in catalog")
-
-                # collecting expected values
-                expected_primary_keys = self.expected_pks()[stream]
-                expected_replication_keys = self.expected_replication_keys()[
-                    stream]
-                expected_automatic_fields = self.expected_automatic_fields().get(stream)
-                expected_replication_method = self.expected_replication_method()[
-                    stream]
-
-                # collecting actual values...
-                schema_and_metadata = menagerie.get_annotated_schema(
-                    conn_id, catalog['stream_id'])
-                metadata = schema_and_metadata["metadata"]
-                stream_properties = [
-                    item for item in metadata if item.get("breadcrumb") == []]
-                actual_primary_keys = set(
-                    stream_properties[0].get(
-                        "metadata", {self.PRIMARY_KEYS: []}).get(self.PRIMARY_KEYS, [])
-                )
-                actual_replication_keys = set(
-                    stream_properties[0].get(
-                        "metadata", {self.REPLICATION_KEYS: []}).get(self.REPLICATION_KEYS, [])
-                )
-                actual_replication_method = stream_properties[0].get(
-                    "metadata", {self.REPLICATION_METHOD: None}).get(self.REPLICATION_METHOD)
-                actual_automatic_fields = set(
-                    item.get("breadcrumb", ["properties", None])[1] for item in metadata
-                    if item.get("metadata").get("inclusion") == "automatic"
-                )
-
-                ##########################################################################
-                # metadata assertions
-                ##########################################################################
-
-                # verify there is only 1 top level breadcrumb in metadata
-                self.assertEqual(len(stream_properties), 1,
-                                 logging='asserting there is only 1 top level breadcrumb in metadata')
-
-                # verify that if there is a replication key we are doing INCREMENTAL otherwise FULL
-                if actual_replication_keys:
-                    self.assertEqual(
-                        actual_replication_method, self.INCREMENTAL,
-                        logging=f"asserting replication method is {self.INCREMENTAL} when replication keys are defined"
-                    )
-                else:
-                    self.assertEqual(
-                        actual_replication_method, self.FULL_TABLE,
-                        logging=f"asserting replication method is {self.FULL_TABLE} when replication keys are not defined"
-                    )
-
-                # verify the actual replication matches our expected replication method
-                self.assertEqual(expected_replication_method, actual_replication_method,
-                                 logging=f"asserting replication method is {expected_replication_method}")
-
-                # verify replication key(s)
-                self.assertEqual(expected_replication_keys, actual_replication_keys,
-                                 logging=f"asserting replication keys are {expected_replication_keys}")
-
-
-                # verify primary key(s) match expectations
-                self.assertSetEqual(expected_primary_keys, actual_primary_keys,
-                                    logging=f"asserting primary keys are {expected_primary_keys}")
-
-                # verify that primary keys and replication keys
-                # are given the inclusion of automatic in metadata.
-                self.assertSetEqual(expected_automatic_fields, actual_automatic_fields,
-                                    logging=f"asserting primary and replication keys {expected_automatic_fields} are automatic")
-
-                # verify that all other fields have inclusion of available
-                # This assumes there are no unsupported fields for SaaS sources
-                self.assertTrue(
-                    all({item.get("metadata").get("inclusion") == "available"
-                         for item in metadata
-                         if item.get("breadcrumb", []) != []
-                         and item.get("breadcrumb", ["properties", None])[1]
-                         not in actual_automatic_fields}),
-                    logging=f"asserting non-key-property fields are available for field selection")
-
-
-    def test_standard_discovery(self):
-        """Discovery test for standard server"""
-        self.eu_residency = False
-        self.discovery_test_run()
-
-    def test_eu_discovery(self):
-        """Discovery test for EU recidency server"""
-        self.eu_residency = True
-        self.discovery_test_run()
diff --git a/tests/tap_tester/test_mixpanel_all_fields.py b/tests/tap_tester/test_mixpanel_all_fields.py
new file mode 100644
index 0000000..84f0ee2
--- /dev/null
+++ b/tests/tap_tester/test_mixpanel_all_fields.py
@@ -0,0 +1,125 @@
+from tap_tester import runner, connections, menagerie
+
+from base import TestMixPanelBase
+
+
+class MixPanelAllFieldsTest(TestMixPanelBase):
+
+    @staticmethod
+    def name():
+        return "tap_tester_mixpanel_all_fields_test"
+
+    def all_fields_test(self):
+        """
+        All Fields Test.
+
+        • Verify that when all fields are selected more than the automatic fields are replicated.
+        • Verify no unexpected streams were replicated
+        • Verify that more than just the automatic fields are replicated for each stream.
+        • Verify all fields for each stream are replicated
+        • Verify that the automatic fields are sent to the target
+        """
+
+        # Only following below 2 streams support pagination
+        expected_streams = self.expected_streams()
+
+        expected_automatic_fields = self.expected_automatic_fields()
+        conn_id = connections.ensure_connection(self)
+
+        found_catalogs = self.run_and_verify_check_mode(conn_id)
+
+        # Table and field selection
+        test_catalogs_all_fields = [
+            catalog
+            for catalog in found_catalogs
+            if catalog.get("tap_stream_id") in expected_streams
+        ]
+
+        self.perform_and_verify_table_and_field_selection(
+            conn_id, test_catalogs_all_fields
+        )
+
+        # Grab metadata after performing table-and-field selection to set expectations
+        # used for asserting all fields are replicated
+        stream_to_all_catalog_fields = dict()
+        for catalog in test_catalogs_all_fields:
+            stream_id, stream_name = catalog["stream_id"], catalog["stream_name"]
+            catalog_entry = menagerie.get_annotated_schema(conn_id, stream_id)
+            fields_from_field_level_md = [
+                md_entry["breadcrumb"][1]
+                for md_entry in catalog_entry["metadata"]
+                if md_entry["breadcrumb"] != []
+            ]
+            stream_to_all_catalog_fields[stream_name] = set(fields_from_field_level_md)
+
+        self.run_and_verify_sync(conn_id)
+
+        actual_fields_by_stream = runner.examine_target_output_for_fields()
+
+        synced_records = runner.get_records_from_target_output()
+
+        # Verify no unexpected streams were replicated
+        synced_stream_names = set(synced_records.keys())
+        self.assertSetEqual(expected_streams, synced_stream_names)
+
+        # All Fields Test
+        for stream in expected_streams:
+            with self.subTest(logging="Primary Functional Test", stream=stream):
+
+                # Expected values
+                expected_all_keys = stream_to_all_catalog_fields[stream]
+                expected_automatic_keys = expected_automatic_fields.get(stream, set())
+
+                # Collect actual values
+                messages = synced_records.get(stream)
+                actual_all_keys = set()
+                for message in messages["messages"]:
+                    if message["action"] == "upsert":
+                        actual_all_keys.update(set(message["data"].keys()))
+
+                # Verify that the automatic fields are sent to the target
+                self.assertTrue(
+                    actual_fields_by_stream.get(stream, set()).issuperset(
+                        expected_automatic_keys
+                    ),
+                    msg="The fields sent to the target don't include all automatic fields",
+                )
+
+                # Verify that more than just the automatic fields are replicated for each stream.
+                # 'cohort_members' has just 2 key and both are automatic
+                if stream != "cohort_members":
+                    self.assertGreater(
+                        len(expected_all_keys), len(expected_automatic_keys)
+                    )
+
+                self.assertTrue(
+                    expected_automatic_keys.issubset(expected_all_keys),
+                    msg=f'{expected_automatic_keys-expected_all_keys} is not in "expected_all_keys"',
+                )
+
+                # As we can't find the below fields in the docs and also
+                # it won't be generated by mixpanel APIs now so expected.
+                if stream == "export":
+                    expected_all_keys = expected_all_keys - {
+                        "labels",
+                        "sampling_factor",
+                        "dataset",
+                        "mp_reserved_duration_s",
+                        "mp_reserved_origin_end",
+                        "mp_reserved_origin_start",
+                        "mp_reserved_event_count",
+                    }
+
+                # Verify all fields for each stream are replicated.
+                # Skip engage as it return records in random manner with dynamic fields.
+                if not stream == "engage":
+                    self.assertSetEqual(expected_all_keys, actual_all_keys)
+
+    def test_run(self):
+        # Pagination test for standard server
+        self.eu_residency = False
+        self.all_fields_test()
+
+        # Pagination test for EU residency server
+        self.eu_residency = True
+        self.all_fields_test()
diff --git a/tests/tap_tester/test_automatic_fields.py b/tests/tap_tester/test_mixpanel_automatic_fields.py
similarity index 67%
rename from tests/tap_tester/test_automatic_fields.py
rename to tests/tap_tester/test_mixpanel_automatic_fields.py
index 7017942..c04195b 100644
--- a/tests/tap_tester/test_automatic_fields.py
+++ b/tests/tap_tester/test_mixpanel_automatic_fields.py
@@ -1,32 +1,38 @@
-import tap_tester.connections as connections
-import tap_tester.runner as runner
+from tap_tester import connections, runner
+
 from base import TestMixPanelBase
 
 
 class MixPanelAutomaticFieldsTest(TestMixPanelBase):
     """
-    Ensure running the tap with all streams selected and all fields deselected results in the replication of just the
+    Ensure running the tap with all streams selected and all fields
+    deselected results in the replication of just the
     primary keys and replication keys (automatic fields).
     """
 
     @staticmethod
     def name():
-        return "mix_panel_automatic_fields_test"
+        return "tap_tester_mixpanel_automatic_fields_test"
 
     def automatic_fields_test_run(self):
         """
-        Verify that for each stream you can get enough data
-        when no fields are selected and only the automatic fields are replicated.
+        • Verify we can deselect all fields except when inclusion=automatic,
+          which is handled by base.py methods
+        • Verify that only the automatic fields are sent to the target.
+        • Verify that all replicated records have unique primary key values.
         """
-        streams_to_test = self.expected_streams()
+        expected_streams = self.expected_streams()
 
         conn_id = connections.ensure_connection(self)
 
         found_catalogs = self.run_and_verify_check_mode(conn_id)
 
-        # table and field selection
-        test_catalogs_automatic_fields = [catalog for catalog in found_catalogs
-                                          if catalog.get('tap_stream_id') in streams_to_test]
+        # Table and field selection
+        test_catalogs_automatic_fields = [
+            catalog
+            for catalog in found_catalogs
+            if catalog.get("tap_stream_id") in expected_streams
+        ]
 
         self.perform_and_verify_table_and_field_selection(
             conn_id, test_catalogs_automatic_fields, select_all_fields=False)
@@ -34,13 +40,13 @@ def automatic_fields_test_run(self):
         record_count_by_stream = self.run_and_verify_sync(conn_id)
         synced_records = runner.get_records_from_target_output()
 
-        for stream in streams_to_test:
+        for stream in expected_streams:
             with self.subTest(stream=stream):
 
-                # expected values
+                # Expected values
                 expected_keys = self.expected_automatic_fields().get(stream)
 
-                # collect actual values
+                # Collect actual values
                 data = synced_records.get(stream, {})
                 record_messages_keys = [set(row['data'].keys())
                                         for row in data.get('messages', [])]
@@ -55,13 +61,12 @@ def automatic_fields_test_run(self):
                 for actual_keys in record_messages_keys:
                     self.assertSetEqual(expected_keys, actual_keys)
 
-
     def test_standard_auto_fields(self):
         """Automatic fields test for standard server"""
         self.eu_residency = False
         self.automatic_fields_test_run()
 
     def test_eu_auto_fields(self):
-        """Automatic fields test for EU recidency server"""
+        """Automatic fields test for EU residency server"""
         self.eu_residency = True
         self.automatic_fields_test_run()
diff --git a/tests/tap_tester/test_bookmark.py b/tests/tap_tester/test_mixpanel_bookmark.py
similarity index 69%
rename from tests/tap_tester/test_bookmark.py
rename to tests/tap_tester/test_mixpanel_bookmark.py
index f6d0b10..83989eb 100644
--- a/tests/tap_tester/test_bookmark.py
+++ b/tests/tap_tester/test_mixpanel_bookmark.py
@@ -1,7 +1,7 @@
-import tap_tester.connections as connections
-import tap_tester.runner as runner
+from tap_tester import menagerie, connections, runner
+
 from base import TestMixPanelBase
-from tap_tester import menagerie
+
 
 
 class MixPanelBookMarkTest(TestMixPanelBase):
@@ -9,7 +9,7 @@ class MixPanelBookMarkTest(TestMixPanelBase):
 
     @staticmethod
     def name():
-        return "mix_panel_bookmark_test"
+        return "tap_tester_mixpanel_bookmark_test"
 
     def bookmark_test_run(self):
         """
@@ -40,9 +40,12 @@ def bookmark_test_run(self):
         # Run in check mode
         found_catalogs = self.run_and_verify_check_mode(conn_id)
 
-        # table and field selection
-        catalog_entries = [catalog for catalog in found_catalogs
-                           if catalog.get('tap_stream_id') in expected_streams]
+        # Table and field selection
+        catalog_entries = [
+            catalog
+            for catalog in found_catalogs
+            if catalog.get("tap_stream_id") in expected_streams
+        ]
 
         self.perform_and_verify_table_and_field_selection(
             conn_id, catalog_entries)
@@ -56,11 +59,10 @@ def bookmark_test_run(self):
         # Update State Between Syncs
         ##########################################################################
 
-        new_states = {'bookmarks': dict()}
-        simulated_states = self.calculated_states_by_stream(
-            first_sync_bookmarks)
+        new_states = {"bookmarks": dict()}
+        simulated_states = self.calculated_states_by_stream(first_sync_bookmarks)
         for stream, new_state in simulated_states.items():
-            new_states['bookmarks'][stream] = new_state
+            new_states["bookmarks"][stream] = new_state
         menagerie.set_state(conn_id, new_states)
 
         ##########################################################################
@@ -78,38 +80,43 @@ def bookmark_test_run(self):
         for stream in expected_streams:
             with self.subTest(stream=stream):
 
-                # expected values
+                # Expected values
                 expected_replication_method = expected_replication_methods[stream]
 
-                # collect information for assertions from syncs 1 & 2 base on expected values
+                # Collect information for assertions from syncs 1 & 2 base on expected values
                 first_sync_count = first_sync_record_count.get(stream, 0)
                 second_sync_count = second_sync_record_count.get(stream, 0)
-                first_sync_messages = [record.get('data') for record in
-                                       first_sync_records.get(
-                                           stream, {}).get('messages', [])
-                                       if record.get('action') == 'upsert']
-                second_sync_messages = [record.get('data') for record in
-                                        second_sync_records.get(
-                                            stream, {}).get('messages', [])
-                                        if record.get('action') == 'upsert']
+                first_sync_messages = [
+                    record.get("data")
+                    for record in first_sync_records.get(stream, {}).get("messages", [])
+                    if record.get("action") == "upsert"
+                ]
+                second_sync_messages = [
+                    record.get("data")
+                    for record in second_sync_records.get(stream, {}).get(
+                        "messages", []
+                    )
+                    if record.get("action") == "upsert"
+                ]
 
                 first_bookmark_value = first_sync_bookmarks.get(
-                    'bookmarks', {stream: None}).get(stream)
+                    "bookmarks", {stream: None}
+                ).get(stream)
                 second_bookmark_value = second_sync_bookmarks.get(
-                    'bookmarks', {stream: None}).get(stream)
+                    "bookmarks", {stream: None}
+                ).get(stream)
 
                 if expected_replication_method == self.INCREMENTAL:
 
-                    # collect information specific to incremental streams from syncs 1 & 2
-                    replication_key = next(
-                        iter(expected_replication_keys[stream]))
+                    # Collect information specific to incremental streams from syncs 1 & 2
+                    replication_key = next(iter(expected_replication_keys[stream]))
 
                     first_bookmark_value_utc = self.convert_state_to_utc(
                         first_bookmark_value)
                     second_bookmark_value_utc = self.convert_state_to_utc(
                         second_bookmark_value)
 
-                    simulated_bookmark = new_states['bookmarks'][stream]
+                    simulated_bookmark = new_states["bookmarks"][stream]
 
                     # Verify the first sync sets a bookmark of the expected form
                     self.assertIsNotNone(first_bookmark_value)
@@ -119,35 +126,42 @@ def bookmark_test_run(self):
 
                     # Verify the second sync bookmark is Equal to the first sync bookmark
                     # assumes no changes to data during test
-                    self.assertEqual(second_bookmark_value,
-                                     first_bookmark_value)
+                    self.assertEqual(second_bookmark_value, first_bookmark_value)
 
                     for record in first_sync_messages:
 
                         # Verify the first sync bookmark value is the max replication key value for a given stream
                         replication_key_value = record.get(replication_key)
                         self.assertLessEqual(
-                            replication_key_value, first_bookmark_value_utc,
-                            msg="First sync bookmark was set incorrectly, a record with a greater replication-key value was synced."
+                            replication_key_value,
+                            first_bookmark_value_utc,
+                            msg="First sync bookmark was set incorrectly,"
+                            "a record with a greater replication-key value was synced.",
                         )
 
                     for record in second_sync_messages:
                         # Verify the second sync replication key value is Greater or Equal to the first sync bookmark
                         replication_key_value = record.get(replication_key)
-                        self.assertGreaterEqual(replication_key_value, simulated_bookmark,
-                                                msg="Second sync records do not repect the previous bookmark.")
+                        self.assertGreaterEqual(
+                            replication_key_value,
+                            simulated_bookmark,
+                            msg="Second sync records do not respect the previous bookmark.",
+                        )
 
                         # Verify the second sync bookmark value is the max replication key value for a given stream
                         self.assertLessEqual(
-                            replication_key_value, second_bookmark_value_utc,
-                            msg="Second sync bookmark was set incorrectly, a record with a greater replication-key value was synced."
+                            replication_key_value,
+                            second_bookmark_value_utc,
+                            msg="Second sync bookmark was set incorrectly,"
+                            " a record with a greater replication-key value was synced.",
                         )
 
-                    # verify that you get less data the 2nd time around
+                    # Verify that you get less data the 2nd time around
                     self.assertLess(
                         second_sync_count,
                         first_sync_count,
-                        msg="second syc didn't have less records, bookmark usage not verified")
+                        msg="Second syc didn't have less records, bookmark usage not verified",
+                    )
 
                 elif expected_replication_method == self.FULL_TABLE:
 
@@ -162,19 +176,23 @@ def bookmark_test_run(self):
 
                     raise NotImplementedError(
                         "INVALID EXPECTATIONS\t\tSTREAM: {} REPLICATION_METHOD: {}".format(
-                            stream, expected_replication_method)
+                            stream, expected_replication_method
+                        )
                     )
 
                 # Verify at least 1 record was replicated in the second sync
                 self.assertGreater(
-                    second_sync_count, 0, msg="We are not fully testing bookmarking for {}".format(stream))
+                    second_sync_count,
+                    0,
+                    msg=f"We are not fully testing bookmarking for {stream}",
+                )
 
     def test_standard_bookmarks(self):
-        """Bookmark test for standard server"""
+        """Bookmark test for standard server."""
         self.eu_residency = False
         self.bookmark_test_run()
 
     def test_eu_bookmarks(self):
-        """Bookmark test for EU recidency server"""
+        """Bookmark test for EU residency server."""
         self.eu_residency = True
         self.bookmark_test_run()
diff --git a/tests/tap_tester/test_mixpanel_discovery.py b/tests/tap_tester/test_mixpanel_discovery.py
new file mode 100644
index 0000000..b578dac
--- /dev/null
+++ b/tests/tap_tester/test_mixpanel_discovery.py
@@ -0,0 +1,190 @@
+import re
+from tap_tester import menagerie, connections, LOGGER
+
+from base import TestMixPanelBase
+
+class MixPanelDiscoverTest(TestMixPanelBase):
+    """
+        Testing that discovery creates the appropriate catalog with valid metadata.
+        • Verify number of actual streams discovered match expected
+        • Verify the stream names discovered were what we expect
+        • Verify stream names follow naming convention
+          streams should only have lowercase alphas and underscores
+        • Verify there is only 1 top level breadcrumb
+        • Verify replication key(s)
+        • Verify primary key(s)
+        • Verify that if there is a replication key we are doing INCREMENTAL otherwise FULL
+        • Verify the actual replication matches our expected replication method
+        • Verify that primary, replication and foreign keys
+          are given the inclusion of automatic.
+        • Verify that all other fields have inclusion of available metadata.
+    """
+
+    @staticmethod
+    def name():
+        return "tap_tester_mixpanel_discover_test"
+
+    def discovery_test_run(self):
+
+        region = "EU" if self.eu_residency else "Standard"
+        LOGGER.info(f"Testing against {region} account.")
+
+        self.assertion_logging_enabled = True
+
+        streams_to_test = self.expected_streams()
+
+        conn_id = connections.ensure_connection(self, payload_hook=None)
+
+        # Verify that there are catalogs found
+        found_catalogs = self.run_and_verify_check_mode(conn_id)
+
+        # Verify stream names follow naming convention
+        # streams should only have lowercase alphas and underscores
+        found_catalog_names = {c["tap_stream_id"] for c in found_catalogs}
+        self.assertTrue(
+            all([re.fullmatch(r"[a-z_]+", name) for name in found_catalog_names]),
+            logging="asserting all streams defined in catalog follow the naming convention '[a-z_]+'",
+        )
+
+        for stream in streams_to_test:
+            with self.subTest(stream=stream):
+
+                # Verify the catalog is found for a given stream
+                catalog = next(
+                    iter(
+                        [
+                            catalog
+                            for catalog in found_catalogs
+                            if catalog["stream_name"] == stream
+                        ]
+                    )
+                )
+                self.assertIsNotNone(
+                    catalog, logging="Asserting entry is present in catalog"
+                )
+
+                # Collecting expected values
+                expected_primary_keys = self.expected_pks()[stream]
+                expected_replication_keys = self.expected_replication_keys()[stream]
+                expected_automatic_fields = self.expected_automatic_fields().get(stream)
+                expected_replication_method = self.expected_replication_method()[stream]
+
+                # Collecting actual values...
+                schema_and_metadata = menagerie.get_annotated_schema(
+                    conn_id, catalog["stream_id"]
+                )
+                metadata = schema_and_metadata["metadata"]
+                stream_properties = [
+                    item for item in metadata if item.get("breadcrumb") == []
+                ]
+                actual_primary_keys = set(
+                    stream_properties[0]
+                    .get("metadata", {self.PRIMARY_KEYS: []})
+                    .get(self.PRIMARY_KEYS, [])
+                )
+                actual_replication_keys = set(
+                    stream_properties[0]
+                    .get("metadata", {self.REPLICATION_KEYS: []})
+                    .get(self.REPLICATION_KEYS, [])
+                )
+                actual_replication_method = (
+                    stream_properties[0]
+                    .get("metadata", {self.REPLICATION_METHOD: None})
+                    .get(self.REPLICATION_METHOD)
+                )
+                actual_automatic_fields = {
+                    item.get("breadcrumb", ["properties", None])[1]
+                    for item in metadata
+                    if item.get("metadata").get("inclusion") == "automatic"
+                }
+
+                actual_fields = []
+                for md_entry in metadata:
+                    if md_entry["breadcrumb"] != []:
+                        actual_fields.append(md_entry["breadcrumb"][1])
+
+                ##########################################################################
+                # Metadata assertions
+                ##########################################################################
+
+                # Verify there is only 1 top level breadcrumb in metadata
+                self.assertEqual(
+                    len(stream_properties),
+                    1,
+                    logging="Asserting there is only 1 top level breadcrumb in metadata",
+                )
+
+                # Verify there is no duplicate metadata entries
+                self.assertEqual(
+                    len(actual_fields),
+                    len(set(actual_fields)),
+                    msg="Duplicates in the fields retrieved",
+                )
+
+                # Verify that if there is a replication key we are doing INCREMENTAL otherwise FULL
+                if actual_replication_keys:
+                    self.assertEqual(
+                        actual_replication_method,
+                        self.INCREMENTAL,
+                        logging=f"Asserting replication method is {self.INCREMENTAL} when replication keys are defined",
+                    )
+                else:
+                    self.assertEqual(
+                        actual_replication_method,
+                        self.FULL_TABLE,
+                        logging=f"Asserting replication method is {self.FULL_TABLE} when replication keys are not defined",
+                    )
+
+                # Verify the actual replication matches our expected replication method
+                self.assertEqual(
+                    expected_replication_method,
+                    actual_replication_method,
+                    logging=f"Asserting replication method is {expected_replication_method}",
+                )
+
+                # Verify replication key(s)
+                self.assertEqual(
+                    expected_replication_keys,
+                    actual_replication_keys,
+                    logging=f"asserting replication keys are {expected_replication_keys}",
+                )
+
+                # Verify primary key(s) match expectations
+                self.assertSetEqual(
+                    expected_primary_keys,
+                    actual_primary_keys,
+                    logging=f"asserting primary keys are {expected_primary_keys}",
+                )
+
+                # Verify that primary keys and replication keys
+                # are given the inclusion of automatic in metadata.
+                self.assertSetEqual(
+                    expected_automatic_fields,
+                    actual_automatic_fields,
+                    logging=f"asserting primary and replication keys {expected_automatic_fields} are automatic",
+                )
+
+                # Verify that all other fields have inclusion of available.
+                # This assumes there are no unsupported fields for SaaS sources
+                self.assertTrue(
+                    all(
+                        {
+                            item.get("metadata").get("inclusion") == "available"
+                         for item in metadata
+                         if item.get("breadcrumb", []) != []
+                         and item.get("breadcrumb", ["properties", None])[1]
+                            not in actual_automatic_fields
+                        }
+                    ),
+                    logging="Asserting non-key-property fields are available for field selection",
+                )
+
+    def test_standard_discovery(self):
+        """Discovery test for standard server."""
+        self.eu_residency = False
+        self.discovery_test_run()
+
+    def test_eu_discovery(self):
+        """Discovery test for EU residency server."""
+        self.eu_residency = True
+        self.discovery_test_run()
diff --git a/tests/tap_tester/test_mixpanel_pagination.py b/tests/tap_tester/test_mixpanel_pagination.py
new file mode 100644
index 0000000..432ca19
--- /dev/null
+++ b/tests/tap_tester/test_mixpanel_pagination.py
@@ -0,0 +1,107 @@
+from math import ceil
+
+from tap_tester import connections, runner
+
+from base import TestMixPanelBase
+
+
+class MixPanelPaginationTest(TestMixPanelBase):
+
+    @staticmethod
+    def name():
+        return "tap_tester_mixpanel_pagination_test"
+
+    def pagination_test_run(self):
+        """
+        Pagination Test
+        • Verify that for each stream you can get multiple pages of data
+        • Verify no duplicate pages are replicated
+        • Verify no unexpected streams were replicated
+
+        PREREQUISITE
+        For EACH stream add enough data that you surpass the limit of a single
+        fetch of data.  For instance if you have a limit of 250 records ensure
+        that 251 (or more) records have been posted for that stream.
+        """
+
+        # Only following below 2 streams support pagination
+        expected_streams = {"engage", "cohort_members"}
+
+        conn_id = connections.ensure_connection(self)
+
+        found_catalogs = self.run_and_verify_check_mode(conn_id)
+
+        # Table and field selection
+        test_catalogs_all_fields = [
+            catalog
+            for catalog in found_catalogs
+            if catalog.get("tap_stream_id") in expected_streams
+        ]
+
+        self.perform_and_verify_table_and_field_selection(
+            conn_id, test_catalogs_all_fields
+        )
+
+        record_count_by_stream = self.run_and_verify_sync(conn_id)
+
+        synced_records = runner.get_records_from_target_output()
+
+        # Verify no unexpected streams were replicated
+        synced_stream_names = set(synced_records.keys())
+        self.assertSetEqual(expected_streams, synced_stream_names)
+
+        for stream in expected_streams:
+            with self.subTest(stream=stream):
+
+                # Expected values
+                expected_primary_keys = self.expected_pks()[stream]
+
+                # Collect actual values
+                messages = synced_records.get(stream)
+                primary_keys_list = [
+                    tuple(
+                        message["data"][expected_pk]
+                        for expected_pk in expected_primary_keys
+                    )
+                    for message in messages["messages"]
+                    if message["action"] == "upsert"
+                ]
+
+                # Verify that we can paginate with all fields selected
+                record_count_sync = record_count_by_stream.get(stream, 0)
+                self.assertGreater(
+                    record_count_sync,
+                    self.API_LIMIT,
+                    msg="The number of records is not over the stream max limit",
+                )
+
+                # Chunk the replicated records (just primary keys) into expected pages
+                pages = []
+                page_count = ceil(len(primary_keys_list) / self.API_LIMIT)
+                page_size = self.API_LIMIT
+                for page_index in range(page_count):
+                    page_start = page_index * page_size
+                    page_end = (page_index + 1) * page_size
+                    pages.append(set(primary_keys_list[page_start:page_end]))
+
+                # Verify by primary keys that data is unique for each page
+                for current_index, current_page in enumerate(pages):
+                    with self.subTest(current_page_primary_keys=current_page):
+
+                        for other_index, other_page in enumerate(pages):
+                            if current_index == other_index:
+                                continue  # Don't compare the page to itself
+
+                            self.assertTrue(
+                                current_page.isdisjoint(other_page),
+                                msg=f"other_page_primary_keys={other_page}",
+                            )
+
+    def test_run(self):
+        # Pagination test for standard server
+        self.eu_residency = False
+        self.pagination_test_run()
+
+        # Pagination test for EU residency server
+        self.eu_residency = True
+        self.pagination_test_run()
diff --git a/tests/tap_tester/test_start_date.py b/tests/tap_tester/test_mixpanel_start_date.py
similarity index 50%
rename from tests/tap_tester/test_start_date.py
rename to tests/tap_tester/test_mixpanel_start_date.py
index 560593a..8a5018b 100644
--- a/tests/tap_tester/test_start_date.py
+++ b/tests/tap_tester/test_mixpanel_start_date.py
@@ -1,6 +1,5 @@
-import tap_tester.connections as connections
-import tap_tester.runner as runner
 from base import TestMixPanelBase
+from tap_tester import connections, runner, LOGGER
 
 
 class MixPanelStartDateTest(TestMixPanelBase):
@@ -9,12 +8,12 @@ class MixPanelStartDateTest(TestMixPanelBase):
 
     @staticmethod
     def name():
-        return "mix_panel_start_date_test"
+        return "tap_tester_mixpanel_start_date_test"
 
     def start_date_test_run(self):
-        """Instantiate start date according to the desired data set and run the test"""
+        """Instantiate start date according to the desired data set and run the test."""
 
-        self.start_date_1 = self.get_properties().get('start_date')
+        self.start_date_1 = self.get_properties().get("start_date")
         self.start_date_2 = self.timedelta_formatted(self.start_date_1, days=15)
 
         self.start_date = self.start_date_1
@@ -25,19 +24,23 @@ def start_date_test_run(self):
         # First Sync
         ##########################################################################
 
-        # instantiate connection
+        # Instantiate connection
         conn_id_1 = connections.ensure_connection(self)
 
-        # run check mode
+        # Run check mode
         found_catalogs_1 = self.run_and_verify_check_mode(conn_id_1)
 
-        # table and field selection
-        test_catalogs_1_all_fields = [catalog for catalog in found_catalogs_1
-                                      if catalog.get('tap_stream_id') in expected_streams]
+        # Table and field selection
+        test_catalogs_1_all_fields = [
+            catalog
+            for catalog in found_catalogs_1
+            if catalog.get("tap_stream_id") in expected_streams
+        ]
         self.perform_and_verify_table_and_field_selection(
-            conn_id_1, test_catalogs_1_all_fields, select_all_fields=True)
+            conn_id_1, test_catalogs_1_all_fields, select_all_fields=True
+        )
 
-        # run initial sync
+        # Run initial sync
         record_count_by_stream_1 = self.run_and_verify_sync(conn_id_1)
         synced_records_1 = runner.get_records_from_target_output()
 
@@ -45,97 +48,115 @@ def start_date_test_run(self):
         # Update START DATE Between Syncs
         ##########################################################################
 
-        print("REPLICATION START DATE CHANGE: {} ===>>> {} ".format(
-            self.start_date, self.start_date_2))
+        LOGGER.info(
+            f"REPLICATION START DATE CHANGE: {self.start_date} ===>>> {self.start_date_2} "
+        )
         self.start_date = self.start_date_2
 
         ##########################################################################
         # Second Sync
         ##########################################################################
 
-        # create a new connection with the new start_date
-        conn_id_2 = connections.ensure_connection(
-            self, original_properties=False)
+        # Create a new connection with the new start_date
+        conn_id_2 = connections.ensure_connection(self, original_properties=False)
 
-        # run check mode
+        # Run check mode
         found_catalogs_2 = self.run_and_verify_check_mode(conn_id_2)
 
-        # table and field selection
-        test_catalogs_2_all_fields = [catalog for catalog in found_catalogs_2
-                                      if catalog.get('tap_stream_id') in expected_streams]
+        # Table and field selection
+        test_catalogs_2_all_fields = [
+            catalog
+            for catalog in found_catalogs_2
+            if catalog.get("tap_stream_id") in expected_streams
+        ]
         self.perform_and_verify_table_and_field_selection(
-            conn_id_2, test_catalogs_2_all_fields, select_all_fields=True)
+            conn_id_2, test_catalogs_2_all_fields, select_all_fields=True
+        )
 
-        # run sync
+        # Run sync
         record_count_by_stream_2 = self.run_and_verify_sync(conn_id_2)
         synced_records_2 = runner.get_records_from_target_output()
 
         for stream in expected_streams:
             with self.subTest(stream=stream):
 
-                # expected values
+                # Expected values
                 expected_primary_keys = self.expected_pks()[stream]
-                expected_start_date_1 = self.timedelta_formatted(
-                    self.start_date_1)
-                expected_start_date_2 = self.timedelta_formatted(
-                    self.start_date_2)
+                expected_metadata = self.expected_metadata()[stream]
+                expected_start_date_1 = self.timedelta_formatted(self.start_date_1)
+                expected_start_date_2 = self.timedelta_formatted(self.start_date_2)
 
-                # collect information for assertions from syncs 1 & 2 base on expected values
+                # Collect information for assertions from syncs 1 & 2 base on expected values
                 record_count_sync_1 = record_count_by_stream_1.get(stream, 0)
                 record_count_sync_2 = record_count_by_stream_2.get(stream, 0)
 
-                primary_keys_list_1 = [tuple(message.get('data').get(expected_pk) for expected_pk in expected_primary_keys)
-                                       for message in synced_records_1.get(stream, {}).get('messages', [])
-                                       if message.get('action') == 'upsert']
-                primary_keys_list_2 = [tuple(message.get('data').get(expected_pk) for expected_pk in expected_primary_keys)
-                                       for message in synced_records_2.get(stream, {}).get('messages', [])
-                                       if message.get('action') == 'upsert']
+                primary_keys_list_1 = [
+                    tuple(
+                        message.get("data").get(expected_pk)
+                        for expected_pk in expected_primary_keys
+                    )
+                    for message in synced_records_1.get(stream, {}).get("messages", [])
+                    if message.get("action") == "upsert"
+                ]
+                primary_keys_list_2 = [
+                    tuple(
+                        message.get("data").get(expected_pk)
+                        for expected_pk in expected_primary_keys
+                    )
+                    for message in synced_records_2.get(stream, {}).get("messages", [])
+                    if message.get("action") == "upsert"
+                ]
 
                 primary_keys_sync_1 = set(primary_keys_list_1)
                 primary_keys_sync_2 = set(primary_keys_list_2)
 
-                if self.is_incremental(stream):
+                if expected_metadata.get(self.OBEYS_START_DATE):
 
-                    # collect information specific to incremental streams from syncs 1 & 2
+                    # Collect information specific to incremental streams from syncs 1 & 2
                     expected_replication_key = next(
-                        iter(self.expected_replication_keys().get(stream, [])))
-                    replication_dates_1 = [row.get('data').get(expected_replication_key) for row in
-                                           synced_records_1.get(
-                        stream, {'messages': []}).get('messages', [])
-                        if row.get('data')]
-                    replication_dates_2 = [row.get('data').get(expected_replication_key) for row in
-                                           synced_records_2.get(
-                        stream, {'messages': []}).get('messages', [])
-                        if row.get('data')]
+                        iter(self.expected_replication_keys().get(stream, []))
+                    )
+                    replication_dates_1 = [
+                        row.get("data").get(expected_replication_key)
+                        for row in synced_records_1.get(stream, {"messages": []}).get(
+                            "messages", []
+                        )
+                        if row.get("data")
+                    ]
+                    replication_dates_2 = [
+                        row.get("data").get(expected_replication_key)
+                        for row in synced_records_2.get(stream, {"messages": []}).get(
+                            "messages", []
+                        )
+                        if row.get("data")
+                    ]
 
                     # # Verify replication key is greater or equal to start_date for sync 1
                     for replication_date in replication_dates_1:
                         self.assertGreaterEqual(
-                            self.parse_date(replication_date), self.parse_date(
-                                expected_start_date_1),
-                            msg="Report pertains to a date prior to our start date.\n" +
-                            "Sync start_date: {}\n".format(expected_start_date_1) +
-                                "Record date: {} ".format(replication_date)
+                            self.parse_date(replication_date),
+                            self.parse_date(expected_start_date_1),
+                            msg="Report pertains to a date prior to our start date.\n"
+                            + f"Sync start_date: {expected_start_date_1}\n"
+                            + f"Record date: {replication_date} ",
                         )
 
                     # Verify replication key is greater or equal to start_date for sync 2
                     for replication_date in replication_dates_2:
                         self.assertGreaterEqual(
-                            self.parse_date(replication_date), self.parse_date(
-                                expected_start_date_2),
-                            msg="Report pertains to a date prior to our start date.\n" +
-                            "Sync start_date: {}\n".format(expected_start_date_2) +
-                                "Record date: {} ".format(replication_date)
+                            self.parse_date(replication_date),
+                            self.parse_date(expected_start_date_2),
+                            msg="Report pertains to a date prior to our start date.\n"
+                            + f"Sync start_date: {expected_start_date_2}\n"
+                            + f"Record date: {replication_date} ",
                         )
 
                     # Verify the number of records replicated in sync 1 is greater than the number
                     # of records replicated in sync 2
-                    self.assertGreater(record_count_sync_1,
-                                       record_count_sync_2)
+                    self.assertGreater(record_count_sync_1, record_count_sync_2)
 
                     # Verify the records replicated in sync 2 were also replicated in sync 1
-                    self.assertTrue(
-                        primary_keys_sync_2.issubset(primary_keys_sync_1))
+                    self.assertTrue(primary_keys_sync_2.issubset(primary_keys_sync_1))
 
                 else:
 
@@ -144,14 +165,13 @@ def start_date_test_run(self):
                     self.assertEqual(record_count_sync_2, record_count_sync_1)
 
                     # Verify by primary key the same records are replicated in the 1st and 2nd syncs
-                    self.assertSetEqual(primary_keys_sync_1,
-                                        primary_keys_sync_2)
+                    self.assertSetEqual(primary_keys_sync_1, primary_keys_sync_2)
 
     def test_run(self):
-        #Start date test for standard server
+        # Start date test for standard server
         self.eu_residency = False
         self.start_date_test_run()
 
-        #Start date test for EU recidency server
+        # Start date test for EU residency server
         self.eu_residency = True
         self.start_date_test_run()
diff --git a/tests/unittests/test_error_handling.py b/tests/unittests/test_error_handling.py
index f9776aa..d4fc94e 100644
--- a/tests/unittests/test_error_handling.py
+++ b/tests/unittests/test_error_handling.py
@@ -1,17 +1,20 @@
 import unittest
 from unittest import mock
+from parameterized import parameterized
 import requests
+
 from tap_mixpanel import client
 
-# mock responce
+# Mock response
 REQUEST_TIMEOUT = 300
 
 
-class Mockresponse:
-    """
-    Mocked standard HTTPResponse to test error handling.
-    """
-    def __init__(self, resp, status_code, content=[""], headers=None, raise_error=False, text={}):
+class MockResponse:
+    """Mocked standard HTTPResponse to test error handling."""
+
+    def __init__(
+        self, status_code, resp = "", content=[""], headers=None, raise_error=True, text={}
+    ):
         self.json_data = resp
         self.status_code = status_code
         self.content = content
@@ -20,371 +23,182 @@ def __init__(self, resp, status_code, content=[""], headers=None, raise_error=Fa
         self.text = text
         self.reason = "error"
 
-    def prepare(self):
-        return (self.json_data, self.status_code, self.content, self.headers, self.raise_error)
-
     def raise_for_status(self):
+        """If an error occur, this method returns a HTTPError object.
+
+        Raises:
+            requests.HTTPError: Mock http error.
+
+        Returns:
+            int: Returns status code if not error occurred.
+        """
         if not self.raise_error:
             return self.status_code
 
         raise requests.HTTPError("mock sample message")
 
     def json(self):
+        """Returns a JSON object of the result."""
         return self.text
 
-# Mock response for timezone related error messages
+
 def get_mock_http_response(content, status_code):
+    """Mock response for timezone related error messages.
+
+    Args:
+        content (str): Returns the content of the response, in bytes.
+        status_code (int): Returns a number that indicates the status.
+
+    Returns:
+        request.Response: Custom mock response.
+    """
     response = requests.Response()
     response.status_code = status_code
     response.headers = {}
     response._content = content.encode()
     return response
 
-@mock.patch('time.sleep', return_value=None) # Mock time.sleep to reduce the time
+
+# Mock time.sleep to reduce the time
+@mock.patch("time.sleep", return_value=None)
 class TestMixpanelErrorHandling(unittest.TestCase):
+    """
+    Test case to verify the custom error message and
+    back off is implemented for mentioned errors in tests.
+    """
 
-    def mock_send_400(*args, **kwargs):
-        return Mockresponse("", 400, raise_error=True)
+    timeout_400_error = {
+        "request": "/api/2.0/engage/revenue?from_date=2020-02-01&to_date=2020-03-01",
+        "error": "Timeout Error.",
+    }
 
     def mock_400_different_timezone(*args, **kwargs):
+        """Mock 400 error response with with different timezone.
+
+        Returns:
+            requests.Response: Returns mock 400 error response.
+        """
         content = " to_date cannot be later than today"
         return get_mock_http_response(content, 400)
 
-    def mock_send_401(*args, **kwargs):
-        return Mockresponse("", 401, raise_error=True)
-
-    def mock_send_402(*args, **kwargs):
-        return Mockresponse("", 402, raise_error=True)
-
-    def mock_send_403(*args, **kwargs):
-        return Mockresponse("", 403, raise_error=True)
-
-    def mock_send_404(*args, **kwargs):
-        return Mockresponse("", 404, raise_error=True)
-
-    def mock_send_429(*args, **kwargs):
-        return Mockresponse("", 429, raise_error=True)
-
-    def mock_send_500(*args, **kwargs):
-        return Mockresponse("", 500, raise_error=True)
-
-    def mock_send_501(*args, **kwargs):
-        return Mockresponse("", 501, raise_error=True)
-
     def mock_send_error(*args, **kwargs):
+        """Mock error response with description in \'error\' field.
+
+        Returns:
+            requests.Response: Returns mock 404 error response.
+        """
         content = '{"error": "Resource not found error message from API response field \'error\'."}'
         return get_mock_http_response(content, 404)
 
     def mock_send_message(*args, **kwargs):
-        content = '{"message": "Resource not found error message from API response field \'message\'."}'
-        return get_mock_http_response(content, 404)
+        """Mock error response with description in \'message\' field.
 
-    @mock.patch("requests.Session.request", side_effect=mock_send_400)
-    def test_request_with_handling_for_400_exception_handling(self, mock_send_400, mock_sleep):
+        Returns:
+            requests.Response: Returns mock 404 error response.
         """
-        Test that `perform_request` method handle 400 error with proper message
-        """
-        try:
-            mock_client = client.MixpanelClient(api_secret="mock_api_secret", api_domain="mock_api_domain", request_timeout=REQUEST_TIMEOUT)
-            mock_client.perform_request('GET')
-        except client.MixpanelBadRequestError as e:
-            expected_error_message = "HTTP-error-code: 400, Error: A validation exception has occurred.(Please verify your credentials.)"
-            # Verifying the message formed for the custom exception
-            self.assertEqual(str(e), expected_error_message)
-
-    @mock.patch("requests.Session.request", side_effect=mock_400_different_timezone)
-    def test_request_with_handling_for_400_for_different_timezone(self, mock_400_different_timezone, mock_sleep):
-        """
-        Test that `perform_request` method handle 400 error with proper message for different timezone
-        """
-        try:
-            mock_client = client.MixpanelClient(api_secret="mock_api_secret", api_domain="mock_api_domain", request_timeout=REQUEST_TIMEOUT)
-            mock_client.perform_request('GET')
-        except client.MixpanelBadRequestError as e:
-            expected_error_message = "HTTP-error-code: 400, Error: A validation exception has occurred. Please validate the timezone with the MixPanel UI under project settings."
-            # Verifying the message formed for the custom exception
-            self.assertEqual(str(e), expected_error_message)
+        content = '{"message": "Resource not found error message from API response field \'message\'."}'
+        return get_mock_http_response(content, 404)
 
+    @parameterized.expand([
+        ["400 error", 400, MockResponse(400), client.MixpanelBadRequestError, "A validation exception has occurred.(Please verify your credentials.)"],
+        ["400 different timezone error", 400, mock_400_different_timezone(), client.MixpanelBadRequestError, "A validation exception has occurred. Please validate the timezone with the MixPanel UI under project settings."],
+        ["400 timeout error", 400, MockResponse(400, text=timeout_400_error), client.MixpanelBadRequestError, "Timeout Error.(Please verify your credentials.)"],
+        ["401 error", 401, MockResponse(401), client.MixpanelUnauthorizedError, "Invalid authorization credentials."],
+        ["402 error", 402, MockResponse(402), client.MixpanelPaymentRequiredError, "Your current plan does not allow API calls. Payment is required to complete the operation."],
+        ["403 error", 403, MockResponse(403), client.MixpanelForbiddenError, "User does not have permission to access the resource."],
+        ["404 error", 404, MockResponse(404), client.MixpanelNotFoundError, "The resource you have specified cannot be found."],
+        ["404 error", 404, mock_send_error(), client.MixpanelNotFoundError, "Resource not found error message from API response field 'error'."],
+        ["404 error", 404, mock_send_message(), client.MixpanelNotFoundError, "Resource not found error message from API response field 'message'."],
+        ["429 error", 429, MockResponse(429), client.Server429Error, "The API rate limit for your organization/application pairing has been exceeded."],
+    ])
     @mock.patch("requests.Session.request")
-    def test_request_with_handling_for_400_timeout_error_handling(self, mock_request, mock_sleep):
-        """
-        Test that `perform_request` method handle 400 error with timeout error message in case of `error` field in response
-        """
-        error = {"request": "/api/2.0/engage/revenue?from_date=2020-02-01&to_date=2020-03-01", "error": "Timeout Error."}
-        mock_request.return_value = Mockresponse("", 400, raise_error=True, text=error)
-        try:
-            mock_client = client.MixpanelClient(api_secret="mock_api_secret", api_domain="mock_api_domain", request_timeout=REQUEST_TIMEOUT)
-            mock_client.perform_request('GET')
-        except client.MixpanelBadRequestError as e:
-            expected_error_message = "HTTP-error-code: 400, Error: Timeout Error.(Please verify your credentials.)"
-            # Verifying the message formed for the timeout error
-            self.assertEqual(str(e), expected_error_message)
-
-    @mock.patch("requests.Session.request", side_effect=mock_send_401)
-    def test_request_with_handling_for_401_exception_handling(self, mock_send_401, mock_sleep):
-        """
-        Test that `perform_request` method handle 401 error with proper message
-        """
-        try:
-            mock_client = client.MixpanelClient(api_secret="mock_api_secret", api_domain="mock_api_domain", request_timeout=REQUEST_TIMEOUT)
-            mock_client.perform_request('GET')
-        except client.MixpanelUnauthorizedError as e:
-            expected_error_message = "HTTP-error-code: 401, Error: Invalid authorization credentials."
-            # Verifying the message formed for the custom exception
-            self.assertEqual(str(e), expected_error_message)
-
-    @mock.patch("requests.Session.request", side_effect=mock_send_402)
-    def test_request_with_handling_for_402_exception_handling(self, mock_send_402, mock_sleep):
-        """
-        Test that `perform_request` method handle 402 error with proper message
-        """
-        try:
-            mock_client = client.MixpanelClient(api_secret="mock_api_secret", api_domain="mock_api_domain", request_timeout=REQUEST_TIMEOUT)
-            mock_client.perform_request('GET')
-        except client.MixpanelPaymentRequiredError as e:
-            expected_error_message = "HTTP-error-code: 402, Error: Your current plan does not allow API calls. Payment is required to complete the operation."
-            # Verifying the message formed for the custom exception
-            self.assertEqual(str(e), expected_error_message)
-
-    @mock.patch("requests.Session.request", side_effect=mock_send_403)
-    def test_request_with_handling_for_403_exception_handling(self, mock_send_403, mock_sleep):
-        """
-        Test that `perform_request` method handle 403 error with proper message
-        """
-        try:
-            mock_client = client.MixpanelClient(api_secret="mock_api_secret", api_domain="mock_api_domain", request_timeout=REQUEST_TIMEOUT)
-            mock_client.perform_request('GET')
-        except client.MixpanelForbiddenError as e:
-            expected_error_message = "HTTP-error-code: 403, Error: User does not have permission to access the resource."
-            # Verifying the message formed for the custom exception
-            self.assertEqual(str(e), expected_error_message)
-
-    @mock.patch("requests.Session.request", side_effect=mock_send_404)
-    def test_request_with_handling_for_404_exception_handling(self, mock_send_404, mock_sleep):
-        """
-        Test that `perform_request` method handle 404 error with proper message
-        """
-        try:
-            mock_client = client.MixpanelClient(api_secret="mock_api_secret", api_domain="mock_api_domain", request_timeout=REQUEST_TIMEOUT)
-            mock_client.perform_request('GET')
-        except client.MixpanelNotFoundError as e:
-            expected_error_message = "HTTP-error-code: 404, Error: The resource you have specified cannot be found."
-            # Verifying the message formed for the custom exception
-            self.assertEqual(str(e), expected_error_message)
-
-    @mock.patch("requests.Session.request", side_effect=mock_send_429)
-    def test_request_with_handling_for_429_exception_handling(self, mock_send_429, mock_sleep):
-        """
-        Test that `perform_request` method handle 429 error with proper message
-        """
-        try:
-            mock_client = client.MixpanelClient(api_secret="mock_api_secret", api_domain="mock_api_domain", request_timeout=REQUEST_TIMEOUT)
-            mock_client.perform_request('GET')
-        except client.Server429Error as e:
-            expected_error_message = "HTTP-error-code: 429, Error: The API rate limit for your organisation/application pairing has been exceeded."
-            # Verifying the message formed for the custom exception
-            self.assertEqual(str(e), expected_error_message)
-
-    @mock.patch("requests.Session.request", side_effect=mock_send_500)
-    def test_request_with_handling_for_500_exception_handling(self, mock_send_500, mock_sleep):
-        """
-        Test that `perform_request` method handle 500 error with proper message
-        """
-        with self.assertRaises(client.MixpanelInternalServiceError):
-            mock_client = client.MixpanelClient(api_secret="mock_api_secret", api_domain="mock_api_domain", request_timeout=REQUEST_TIMEOUT)
-            mock_client.perform_request('GET')
-
-    @mock.patch("requests.Session.request", side_effect=mock_send_501)
-    def test_request_with_handling_for_501_exception_handling(self, mock_send_501, mock_sleep):
-        """
-        Test that `perform_request` method handle 501 error with proper message
-        """
-        with self.assertRaises(client.Server5xxError):
-            mock_client = client.MixpanelClient(api_secret="mock_api_secret", api_domain="mock_api_domain", request_timeout=REQUEST_TIMEOUT)
-            mock_client.perform_request('GET')
-
-    @mock.patch("requests.Session.request", side_effect=mock_send_error)
-    def test_request_with_handling_for_404_exception_handling_error(self, mock_send_error, mock_sleep):
-        '''
-            Verify that if 'error' field is present in API response then it should be used as error message.
-        '''
-        try:
-            mock_client = client.MixpanelClient(api_secret="mock_api_secret", api_domain="mock_api_domain", request_timeout=REQUEST_TIMEOUT)
-            mock_client.perform_request('GET')
-        except client.MixpanelNotFoundError as e:
-            expected_error_message = "HTTP-error-code: 404, Error: Resource not found error message from API response field 'error'."
-            # Verifying the message retrived from 'error' field of API response
-            self.assertEqual(str(e), expected_error_message)
-
-    @mock.patch("requests.Session.request", side_effect=mock_send_message)
-    def test_request_with_handling_for_404_exception_handling_message(self, mock_send_message, mock_sleep):
-        '''
-            Verify that if 'message' field is present in API response then it should be used as error message.
-        '''
-        try:
-            mock_client = client.MixpanelClient(api_secret="mock_api_secret", api_domain="mock_api_domain", request_timeout=REQUEST_TIMEOUT)
-            mock_client.perform_request('GET')
-        except client.MixpanelNotFoundError as e:
-            expected_error_message = "HTTP-error-code: 404, Error: Resource not found error message from API response field 'message'."
-            # Verifying the message retrived from 'message' field of API response
-            self.assertEqual(str(e), expected_error_message)
-
-    @mock.patch("requests.Session.get", side_effect=mock_send_400)
-    def test_check_access_with_handling_for_400_exception_handling(self, mock_send_400, mock_sleep):
-        """
-        Test that `check_access` method handle 404 error with proper message
-        """
-        try:
-            tap_stream_id = "tap_mixpanel"
-            mock_client = client.MixpanelClient(api_secret="mock_api_secret", api_domain="mock_api_domain", request_timeout=REQUEST_TIMEOUT)
-            mock_client.check_access()
-        except client.MixpanelBadRequestError as e:
-            expected_error_message = "HTTP-error-code: 400, Error: A validation exception has occurred.(Please verify your credentials.)"
-            # Verifying the message formed for the custom exception
-            self.assertEqual(str(e), expected_error_message)
-
-    @mock.patch("requests.Session.get", side_effect=mock_400_different_timezone)
-    def test_check_access_with_handling_for_400_for_different_timezone(self, mock_400_different_timezone, mock_sleep):
-        """
-        Test that `check_access` method handle 404 error with proper message for different timezone
-        """
-        try:
-            tap_stream_id = "tap_mixpanel"
-            mock_client = client.MixpanelClient(api_secret="mock_api_secret", api_domain="mock_api_domain", request_timeout=REQUEST_TIMEOUT)
-            mock_client.check_access()
-        except client.MixpanelBadRequestError as e:
-            expected_error_message = "HTTP-error-code: 400, Error: A validation exception has occurred. Please validate the timezone with the MixPanel UI under project settings."
-            # Verifying the message formed for the custom exception
-            self.assertEqual(str(e), expected_error_message)
-
+    def test_perform_request_exception_handling(
+        self, test_name, error_code, mock_response, error, error_message, mock_request, mock_sleep,
+    ):
+        """
+        Test that `perform_request` method handle error with proper message.
+        """
+        mock_request.return_value = mock_response
+        mock_client = client.MixpanelClient(
+            api_secret="mock_api_secret",
+            api_domain="mock_api_domain",
+            request_timeout=REQUEST_TIMEOUT,
+        )
+        with self.assertRaises(error) as e:
+            mock_client.perform_request("GET")
+
+        expected_error_message = f"HTTP-error-code: {error_code}, Error: {error_message}"
+
+        # Verifying the message formed for the custom exception
+        self.assertEqual(str(e.exception), expected_error_message)
+
+    @parameterized.expand([
+        ["400 error", 400, MockResponse(400), client.MixpanelBadRequestError, "A validation exception has occurred.(Please verify your credentials.)"],
+        ["400 different timezone error", 400, mock_400_different_timezone(), client.MixpanelBadRequestError, "A validation exception has occurred. Please validate the timezone with the MixPanel UI under project settings."],
+        ["400 timeout error", 400, MockResponse(400, text=timeout_400_error), client.MixpanelBadRequestError, "Timeout Error.(Please verify your credentials.)"],
+        ["401 error", 401, MockResponse(401), client.MixpanelUnauthorizedError, "Invalid authorization credentials."],
+        ["403 error", 403, MockResponse(403), client.MixpanelForbiddenError, "User does not have permission to access the resource."],
+        ["404 error", 404, MockResponse(404), client.MixpanelNotFoundError, "The resource you have specified cannot be found."],
+        ["404 error", 404, mock_send_error(), client.MixpanelNotFoundError, "Resource not found error message from API response field 'error'."],
+        ["404 error", 404, mock_send_message(), client.MixpanelNotFoundError, "Resource not found error message from API response field 'message'."],
+        ["429 error", 429, MockResponse(429), client.Server429Error, "The API rate limit for your organization/application pairing has been exceeded."],
+        ["500 error", 500, MockResponse(500), client.MixpanelInternalServiceError, "Server encountered an unexpected condition that prevented it from fulfilling the request."],
+        ["501 error", 501, MockResponse(501), client.MixpanelError, "Unknown Error"],
+    ])
     @mock.patch("requests.Session.get")
-    def test_check_access_with_handling_for_400_timeout_error_handling(self, mock_request, mock_sleep):
-        """
-        Test that `check_access` method handle 404 error with timeout error message in case of `error` field in response
-        """
-        error = {"request": "/api/2.0/engage/revenue?from_date=2020-02-01&to_date=2020-03-01", "error": "Timeout Error."}
-        mock_request.return_value = Mockresponse("", 400, raise_error=True, text=error)
-        try:
-            mock_client = client.MixpanelClient(api_secret="mock_api_secret", api_domain="mock_api_domain", request_timeout=REQUEST_TIMEOUT)
-            mock_client.check_access()
-        except client.MixpanelBadRequestError as e:
-            expected_error_message = "HTTP-error-code: 400, Error: Timeout Error.(Please verify your credentials.)"
-            # Verifying the message formed for the timeout error
-            self.assertEqual(str(e), expected_error_message)
-
-    @mock.patch("requests.Session.request", side_effect=mock_send_401)
-    def test_check_access_with_handling_for_401_exception_handling(self, mock_send_401, mock_sleep):
-        """
-        Test that `check_access` method handle 401 error with proper message
-        """
-        try:
-            mock_client = client.MixpanelClient(api_secret="mock_api_secret", api_domain="mock_api_domain", request_timeout=REQUEST_TIMEOUT)
+    def test_check_access_exception_handling(
+        self, test_name, error_code, mock_response, error, error_message, mock_request, mock_sleep,
+    ):
+        """
+        Test that `check_access` method handle error with proper message.
+        """
+        mock_request.return_value = mock_response
+        mock_client = client.MixpanelClient(
+            api_secret="mock_api_secret",
+            api_domain="mock_api_domain",
+            request_timeout=REQUEST_TIMEOUT,
+        )
+        with self.assertRaises(error) as e:
             mock_client.check_access()
-        except client.MixpanelUnauthorizedError as e:
-            expected_error_message = "HTTP-error-code: 401, Error: Invalid authorization credentials."
-            # Verifying the message formed for the custom exception
-            self.assertEqual(str(e), expected_error_message)
 
-    @mock.patch("requests.Session.request", side_effect=mock_send_403)
-    def test_check_access_with_handling_for_403_exception_handling(self, mock_send_403, mock_sleep):
-        """
-        Test that `check_access` method handle 403 error with proper message
-        """
-        try:
-            mock_client = client.MixpanelClient(api_secret="mock_api_secret", api_domain="mock_api_domain", request_timeout=REQUEST_TIMEOUT)
-            mock_client.check_access()
-        except client.MixpanelForbiddenError as e:
-            expected_error_message = "HTTP-error-code: 403, Error: User does not have permission to access the resource."
-            # Verifying the message formed for the custom exception
-            self.assertEqual(str(e), expected_error_message)
+        expected_error_message = f"HTTP-error-code: {error_code}, Error: {error_message}"
 
-    @mock.patch("requests.Session.request", side_effect=mock_send_404)
-    def test_check_access_with_handling_for_404_exception_handling(self, mock_send_404, mock_sleep):
-        """
-        Test that `check_access` method handle 404 error with proper message
-        """
-        try:
-            mock_client = client.MixpanelClient(api_secret="mock_api_secret", api_domain="mock_api_domain", request_timeout=REQUEST_TIMEOUT)
-            mock_client.check_access()
-        except client.MixpanelNotFoundError as e:
-            expected_error_message = "HTTP-error-code: 404, Error: The resource you have specified cannot be found."
-            # Verifying the message formed for the custom exception
-            self.assertEqual(str(e), expected_error_message)
+        # Verifying the message formed for the custom exception
+        self.assertEqual(str(e.exception), expected_error_message)
 
-    @mock.patch("requests.Session.request", side_effect=mock_send_429)
-    def test_check_access_with_handling_for_429_exception_handling(self, mock_send_429, mock_sleep):
-        """
-        Test that `check_access` method handle 429 error with proper message
-        """
-        try:
-            mock_client = client.MixpanelClient(api_secret="mock_api_secret", api_domain="mock_api_domain", request_timeout=REQUEST_TIMEOUT)
-            mock_client.check_access()
-        except client.Server429Error as e:
-            expected_error_message = "HTTP-error-code: 429, Error: The API rate limit for your organisation/application pairing has been exceeded."
-            # Verifying the message formed for the custom exception
-            self.assertEqual(str(e), expected_error_message)
-
-    @mock.patch("requests.Session.request", side_effect=mock_send_500)
-    def test_check_access_with_handling_for_500_exception_handling(self, mock_send_500, mock_sleep):
-        """
-        Test that `check_access` method handle 500 error with proper message
-        """
-        try:
-            mock_client = client.MixpanelClient(api_secret="mock_api_secret", api_domain="mock_api_domain", request_timeout=REQUEST_TIMEOUT)
-            mock_client.check_access()
-        except client.MixpanelInternalServiceError as e:
-            expected_error_message = "HTTP-error-code: 500, Error: Server encountered an unexpected condition that prevented it from fulfilling the request."
-            # Verifying the message formed for the custom exception
-            self.assertEqual(str(e), expected_error_message)
-
-    @mock.patch("requests.Session.request", side_effect=mock_send_501)
-    def test_check_access_with_handling_for_501_exception_handling(self, mock_send_501, mock_sleep):
+    @parameterized.expand([
+        ["500 error", MockResponse(500), client.MixpanelInternalServiceError],
+        ["501 error", MockResponse(501), client.Server5xxError],
+    ])
+    @mock.patch("requests.Session.request")
+    def test_request_with_handling_for_5xx_exception_handling(
+        self, test_name, mock_response, error, mock_request, mock_sleep
+    ):
         """
-        Test that `check_access` method handle 501 error with proper message
+        Test that `perform_request` method handle 5xx error with proper message.
         """
-        try:
-            mock_client = client.MixpanelClient(api_secret="mock_api_secret", api_domain="mock_api_domain", request_timeout=REQUEST_TIMEOUT)
-            mock_client.check_access()
-        except client.MixpanelError as e:
-            expected_error_message = "HTTP-error-code: 501, Error: Unknown Error"
-            # Verifying the message formed for the custom exception
-            self.assertEqual(str(e), expected_error_message)
-
-
-    @mock.patch("requests.Session.request", side_effect=mock_send_error)
-    def test_check_access_with_handling_for_404_exception_handling_error(self, mock_send_error, mock_sleep):
-        '''
-            Verify that if 'error' field is present in API response then it should be used as error message.
-        '''
-        try:
-            mock_client = client.MixpanelClient(api_secret="mock_api_secret", api_domain="mock_api_domain", request_timeout=REQUEST_TIMEOUT)
-            mock_client.check_access()
-        except client.MixpanelNotFoundError as e:
-            expected_error_message = "HTTP-error-code: 404, Error: Resource not found error message from API response field 'error'."
-            # Verifying the message retrived from 'error' field of API response
-            self.assertEqual(str(e), expected_error_message)
-
-    @mock.patch("requests.Session.request", side_effect=mock_send_message)
-    def test_check_access_with_handling_for_404_exception_handling_message(self, mock_send_message, mock_sleep):
-        '''
-            Verify that if 'message' field is present in API response then it should be used as error message.
-        '''
-        try:
-            mock_client = client.MixpanelClient(api_secret="mock_api_secret", api_domain="mock_api_domain", request_timeout=REQUEST_TIMEOUT)
-            mock_client.check_access()
-        except client.MixpanelNotFoundError as e:
-            expected_error_message = "HTTP-error-code: 404, Error: Resource not found error message from API response field 'message'."
-            # Verifying the message retrived from 'message' field of API response
-            self.assertEqual(str(e), expected_error_message)
+        mock_request.return_value = mock_response
+        mock_client = client.MixpanelClient(
+            api_secret="mock_api_secret",
+            api_domain="mock_api_domain",
+            request_timeout=REQUEST_TIMEOUT,
+        )
+        with self.assertRaises(error):
+            mock_client.perform_request("GET")
 
     @mock.patch("requests.Session.request", side_effect=requests.exceptions.Timeout)
     def test_check_access_handle_timeout_error(self, mock_request, mock_sleep):
-        '''
-        Check whether the request backoffs properly for `check_access` method for 5 times in case of Timeout error.
-        '''
-        mock_client = client.MixpanelClient(api_secret="mock_api_secret", api_domain="mock_api_domain", request_timeout=REQUEST_TIMEOUT)
+        """
+        Check whether the request back off properly for `check_access`
+        method for 5 times in case of Timeout error.
+        """
+        mock_client = client.MixpanelClient(
+            api_secret="mock_api_secret",
+            api_domain="mock_api_domain",
+            request_timeout=REQUEST_TIMEOUT,
+        )
         with self.assertRaises(client.ReadTimeoutError):
             mock_client.check_access()
-    
+
         # Verify that requests.Session.request is called 5 times
-        self.assertEqual(mock_request.call_count, 5)
\ No newline at end of file
+        self.assertEqual(mock_request.call_count, 5)
diff --git a/tests/unittests/test_medium_client.py b/tests/unittests/test_medium_client.py
index ddba05b..761aad5 100644
--- a/tests/unittests/test_medium_client.py
+++ b/tests/unittests/test_medium_client.py
@@ -1,118 +1,174 @@
 from collections.abc import Generator
 from unittest import mock
-from unittest.mock import patch
 
-import backoff
 import requests
 import requests_mock
 from pytest import raises
+
 from tap_mixpanel import client
-from tap_mixpanel.client import (ReadTimeoutError, Server5xxError,
-                                 Server429Error, MixpanelInternalServiceError)
+from tap_mixpanel.client import (
+    MixpanelInternalServiceError,
+    ReadTimeoutError,
+    Server5xxError,
+    Server429Error,
+)
 from tests.configuration.fixtures import mixpanel_client
 
 
-@mock.patch('time.sleep', return_value=None)
+@mock.patch("time.sleep", return_value=None)
 def test_request_export_backoff_on_timeout(mock_sleep, mixpanel_client):
-    with requests_mock.Mocker() as m:
-        m.request('GET', 'http://test.com',
-                  exc=requests.exceptions.Timeout('Timeout on request'))
-
-        with raises(ReadTimeoutError) as ex:
-            for record in mixpanel_client.request_export('GET', url='http://test.com'):
+    """
+    Test that request_export method of the client backoff max times
+    (time.sleep called 'Max-1' times) if timeout error occur.
+    """
+    with requests_mock.Mocker() as mocker:
+        mocker.request(
+            "GET",
+            "http://test.com",
+            exc=requests.exceptions.Timeout("Timeout on request"),
+        )
+
+        with raises(ReadTimeoutError):
+            for record in mixpanel_client.request_export("GET", url="http://test.com"):
                 pass
+
         # Assert backoff retry count as expected
         assert mock_sleep.call_count == client.BACKOFF_MAX_TRIES_REQUEST - 1
 
 
-@mock.patch('time.sleep', return_value=None)
+@mock.patch("time.sleep", return_value=None)
 def test_request_export_backoff_on_remote_timeout(mock_sleep, mixpanel_client):
-    with requests_mock.Mocker() as m:
-        m.request('GET', 'http://test.com', text=None, status_code=504)
-        result = mixpanel_client.request_export('GET', url='http://test.com')
-
-        with raises(Server5xxError) as ex:
-            for record in result:
+    """
+    Test that request_export method of the client backoff max times
+    (time.sleep called 'Max-1' times) if 504 error occur.
+    """
+    with requests_mock.Mocker() as mocker:
+        mocker.request("GET", "http://test.com", text=None, status_code=504)
+
+        with raises(Server5xxError):
+            for _ in mixpanel_client.request_export("GET", url="http://test.com"):
                 pass
+
         # Assert backoff retry count as expected
         assert mock_sleep.call_count == client.BACKOFF_MAX_TRIES_REQUEST - 1
 
 
-@mock.patch('time.sleep', return_value=None)
+@mock.patch("time.sleep", return_value=None)
 def test_perform_request_backoff_on_remote_timeout_429(mock_sleep, mixpanel_client):
-    with requests_mock.Mocker() as m:
-        m.request('GET', 'http://test.com', text=None,
-                  content=b'error', status_code=429)
-
-        with raises(Server429Error) as ex:
-            result = mixpanel_client.perform_request(
-                'GET', url='http://test.com')
-            for record in result:
-                pass
+    """
+    Test that perform_request method of the client backoff max times
+    (time.sleep called 'Max-1' times) if 429 error occur.
+    """
+    with requests_mock.Mocker() as mocker:
+        mocker.request(
+            "GET", "http://test.com", text=None, content=b"error", status_code=429
+        )
+
+        with raises(Server429Error):
+            mixpanel_client.perform_request("GET", url="http://test.com")
+
         # Assert backoff retry count as expected
         assert mock_sleep.call_count == client.BACKOFF_MAX_TRIES_REQUEST - 1
 
 
-@mock.patch('time.sleep', return_value=None)
+@mock.patch("time.sleep", return_value=None)
 def test_perform_request_backoff_on_remote_timeout_500(mock_sleep, mixpanel_client):
-    with requests_mock.Mocker() as m:
-        m.request('GET', 'http://test.com', text=None, status_code=500)
+    """
+    Test that perform_request method of the client backoff max times
+    (time.sleep called 'Max-1' times) if 500 error occur.
+    """
+    with requests_mock.Mocker() as mocker:
+        mocker.request("GET", "http://test.com", text=None, status_code=500)
 
-        with raises(MixpanelInternalServiceError) as ex:
-            result = mixpanel_client.perform_request(
-                'GET', url='http://test.com')
+        with raises(MixpanelInternalServiceError):
+            mixpanel_client.perform_request("GET", url="http://test.com")
 
-            for record in result:
-                pass
         # Assert backoff retry count as expected
         assert mock_sleep.call_count == client.BACKOFF_MAX_TRIES_REQUEST - 1
 
 
-@mock.patch('time.sleep', return_value=None)
+@mock.patch("time.sleep", return_value=None)
 def test_check_access_backoff_on_remote_timeout_429(mock_sleep, mixpanel_client):
-    with requests_mock.Mocker() as m:
-        m.request('GET', 'https://mixpanel.com/api/2.0/engage',
-                  content=b'error', text=None, status_code=429)
+    """
+    Test that check_access method of the client backoff 5 times (time.sleep called 4 times)
+    if 429 error occur.
+    """
+    with requests_mock.Mocker() as mocker:
+        mocker.request(
+            "GET",
+            "https://mixpanel.com/api/2.0/engage",
+            content=b"error",
+            text=None,
+            status_code=429,
+        )
+
+        with raises(Server429Error):
+            mixpanel_client.check_access()
 
-        with raises(Server429Error) as ex:
-            result = mixpanel_client.check_access()
         # Assert backoff retry count as expected
         assert mock_sleep.call_count == 5 - 1
 
 
-@mock.patch('time.sleep', return_value=None)
+@mock.patch("time.sleep", return_value=None)
 def test_check_access_backoff_on_remote_timeout_500(mock_sleep, mixpanel_client):
-    with requests_mock.Mocker() as m:
-        m.request('GET', 'https://mixpanel.com/api/2.0/engage',
-                  content=b'error', text=None, status_code=500)
+    """
+    Test that check_access method of the client backoff 5 times (time.sleep called 4 times)
+    if 500 error occur.
+    """
+    with requests_mock.Mocker() as mocker:
+        mocker.request(
+            "GET",
+            "https://mixpanel.com/api/2.0/engage",
+            content=b"error",
+            text=None,
+            status_code=500,
+        )
+
+        with raises(MixpanelInternalServiceError):
+            mixpanel_client.check_access()
 
-        with raises(MixpanelInternalServiceError) as ex:
-            result = mixpanel_client.check_access()
         # Assert backoff retry count as expected
         assert mock_sleep.call_count == 5 - 1
 
 
-@mock.patch('time.sleep', return_value=None)
+@mock.patch("time.sleep", return_value=None)
 def test_request_backoff_on_timeout(mock_sleep, mixpanel_client):
-    with requests_mock.Mocker() as m:
-        m.request('GET', 'http://test.com',
-                  exc=requests.exceptions.Timeout('Timeout on request'))
+    """
+    Test that for te `request` method of the client back max times for the timeout.
+    """
+    with requests_mock.Mocker() as mocker:
+        mocker.request(
+            "GET",
+            "http://test.com",
+            exc=requests.exceptions.Timeout("Timeout on request"),
+        )
+
+        with raises(ReadTimeoutError):
+            mixpanel_client.request("GET", url="http://test.com")
 
-        with raises(ReadTimeoutError) as ex:
-            result = mixpanel_client.request('GET', url='http://test.com')
         # Assert backoff retry count as expected
         assert mock_sleep.call_count == client.BACKOFF_MAX_TRIES_REQUEST - 1
 
 
 def test_request_returns_json(mixpanel_client):
-    with requests_mock.Mocker() as m:
-        m.request('GET', 'http://test.com', json={'a': 'b'})
-        result = mixpanel_client.request('GET', url='http://test.com')
-        assert result == {'a': 'b'}
+    """
+    Test that request method of the client returns json object.
+    """
+    with requests_mock.Mocker() as mocker:
+        mocker.request("GET", "http://test.com", json={"a": "b"})
+        result = mixpanel_client.request("GET", url="http://test.com")
+
+        # Verify that returned object is expected JSON.
+        assert result == {"a": "b"}
 
 
 def test_request_export_returns_generator(mixpanel_client):
-    with requests_mock.Mocker() as m:
-        m.request('GET', 'http://test.com', json={'a': 'b'})
-        result = mixpanel_client.request_export('GET', url='http://test.com')
+    """
+    Test that request method of the client returns an generator object.
+    """
+    with requests_mock.Mocker() as mocker:
+        mocker.request("GET", "http://test.com", json={"a": "b"})
+        result = mixpanel_client.request_export("GET", url="http://test.com")
+
+        # Verify that returned object is a generator object.
         assert isinstance(result, Generator)
diff --git a/tests/unittests/test_request_timeout_param_value.py b/tests/unittests/test_request_timeout_param_value.py
index c6ea20d..e9ca91b 100644
--- a/tests/unittests/test_request_timeout_param_value.py
+++ b/tests/unittests/test_request_timeout_param_value.py
@@ -1,5 +1,7 @@
 import unittest
 from unittest import mock
+from parameterized import parameterized
+
 from tap_mixpanel.__init__ import main
 
 CONFIG = {
@@ -11,113 +13,95 @@
     "start_date": "2020-02-01T00:00:00Z",
     "user_agent": "tap-mixpanel <api_user_email@your_company.com>",
     "eu_residency": False,
-    "end_date": "2020-03-02T00:00:00Z"
+    "end_date": "2020-03-02T00:00:00Z",
 }
 
-REQUEST_TIMEOUT = 300
-REQUEST_TIMEOUT_FLOAT = 300.0
+REQUEST_TIMEOUT_DEFAULT = 300
+TIMEOUT_FLOAT = 200.0
+TIMEOUT_INT = 200
+NULL_STRING = ""
+ZERO_INT = 0
+ZERO_STRING = "0"
+STRING_INT = "200"
+
+
+class MockParseArgs:
+    """Mocked MockParseArgs class with custom state, discover, config
+    attributes to pass unit test cases."""
 
-class MockParseArgs():
-    """
-    Mocked MockParseArgs class with custom state, discover, config attributes to pass unit test cases.
-    """
     def __init__(self, state, discover, config):
         self.state = state
         self.discover = discover
         self.config = config
 
+
 class Mockresponse:
-    """
-    Mocked standard HTTPResponse.
-    """
+    """Mocked standard HTTPResponse."""
+
     def __init__(self, resp, status_code):
         self.json_data = resp
         self.status_code = status_code
 
+
 HEADER = {
-        'User-Agent': 'tap-mixpanel <api_user_email@your_company.com>',
-        'Accept': 'application/json',
-        'Authorization': 'Basic ZHVtbXlfc2VjcmV0'
-    }
-   
-@mock.patch("requests.Session.request", return_value = Mockresponse("", status_code=200))
+    "User-Agent": "tap-mixpanel <api_user_email@your_company.com>",
+    "Accept": "application/json",
+    "Authorization": "Basic ZHVtbXlfc2VjcmV0",
+}
+
+
+@mock.patch("requests.Session.request", return_value=Mockresponse("", status_code=200))
 @mock.patch("singer.utils.parse_args")
-@mock.patch("tap_mixpanel.__init__.do_discover", return_value = '')
+@mock.patch("tap_mixpanel.__init__.do_discover", return_value="")
 class TestMixpanelRequestTimeoutParameterValue(unittest.TestCase):
-    """Test that tap handles different type of request_timeout parameter values"""
-    def test_request_timeout_for_none_param_value(self, mock_discover, mock_parse_args, mock_request):
-        """Test that tap handles none value of request_timeout parameter"""
+    """
+    Test that tap handles different type of request_timeout parameter
+    values.
+    """
+
+    def test_request_timeout_for_none_param_value(
+        self, mock_discover, mock_parse_args, mock_request
+    ):
+        """Test that tap handles none value of request_timeout parameter."""
         config = CONFIG.copy()
-        mock_parse_args.return_value = MockParseArgs(state = {}, discover = True, config=config)
-        r = main()
+        mock_parse_args.return_value = MockParseArgs(
+            state={}, discover=True, config=config
+        )
+        main()
 
         # Verify that request method called with expected parameter value when"request_timeout" is None
-        mock_request.assert_called_with('GET','https://mixpanel.com/api/2.0/engage', allow_redirects=True,
-                                        headers=HEADER, timeout=REQUEST_TIMEOUT)
-    
-    def test_request_timeout_for_empty_param_value(self, mock_discover, mock_parse_args, mock_request):
-        """Test that tap handles empty value of request_timeout parameter"""
-        config = CONFIG.copy()
-        config['request_timeout'] = ""
-        mock_parse_args.return_value = MockParseArgs(state = {}, discover = True, config=config)
-        r = main()
-
-        # Verify that request method called with expected parameter value when"request_timeout" is empty string
-        mock_request.assert_called_with('GET','https://mixpanel.com/api/2.0/engage', allow_redirects=True,
-                                        headers=HEADER, timeout=REQUEST_TIMEOUT)
-    
-    def test_request_timeout_for_string_param_value(self, mock_discover, mock_parse_args, mock_request):
-        """Test that tap handles string value of request_timeout parameter"""
-        config = CONFIG.copy()
-        config['request_timeout'] = "100"
-        mock_parse_args.return_value = MockParseArgs(state = {}, discover = True, config=config)
-        r = main()
-
-        # Verify that request method called with expected parameter value when"request_timeout" is string
-        mock_request.assert_called_with('GET','https://mixpanel.com/api/2.0/engage', allow_redirects=True,
-                                        headers=HEADER, timeout=100.0)
-    
-    def test_request_timeout_for_int_param_value(self, mock_discover, mock_parse_args, mock_request):
-        """Test that tap handles int value of request_timeout parameter"""
-        config = CONFIG.copy()
-        config['request_timeout'] = 200
-        mock_parse_args.return_value = MockParseArgs(state = {}, discover = True, config=config)
-        r = main()
-
-        # Verify that request method called with expected parameter value when"request_timeout" is int
-        mock_request.assert_called_with('GET','https://mixpanel.com/api/2.0/engage', allow_redirects=True,
-                                        headers=HEADER, timeout=200.0)
-        
-    def test_request_timeout_for_float_param_value(self, mock_discover, mock_parse_args, mock_request):
-        """Test that tap handles float value of request_timeout parameter"""
-        config = CONFIG.copy()
-        config['request_timeout'] = REQUEST_TIMEOUT_FLOAT
-        mock_parse_args.return_value = MockParseArgs(state = {}, discover = True, config=config)
-        r = main()
-
-        # Verify that request method called with expected parameter value when"request_timeout" is float
-        mock_request.assert_called_with('GET','https://mixpanel.com/api/2.0/engage', allow_redirects=True,
-                                        headers=HEADER, timeout=REQUEST_TIMEOUT_FLOAT)
-        
-    def test_request_timeout_for_zero_int_param_value(self, mock_discover, mock_parse_args, mock_request):
-        """Test that tap handles int 0 value of request_timeout parameter"""
-        config = CONFIG.copy()
-        config['request_timeout'] = 0
-        mock_parse_args.return_value = MockParseArgs(state = {}, discover = True, config=config)
-        r = main()
-
-        # Verify that request method called with expected parameter value when"request_timeout" is int 0
-        mock_request.assert_called_with('GET','https://mixpanel.com/api/2.0/engage', allow_redirects=True,
-                                        headers=HEADER, timeout=REQUEST_TIMEOUT)
-  
-    def test_request_timeout_for_zero_string_param_value(self, mock_discover, mock_parse_args, mock_request):
-        """Test that tap handles string 0 value of request_timeout parameter"""
-        config = CONFIG.copy()
-        config['request_timeout'] = "0"
-        mock_parse_args.return_value = MockParseArgs(state = {}, discover = True, config=config)
-        r = main()
+        mock_request.assert_called_with(
+            "GET",
+            "https://mixpanel.com/api/2.0/engage",
+            allow_redirects=True,
+            headers=HEADER,
+            timeout=REQUEST_TIMEOUT_DEFAULT,
+        )
 
-        # Verify that request method called with expected parameter value when"request_timeout" is string 0
-        mock_request.assert_called_with('GET','https://mixpanel.com/api/2.0/engage', allow_redirects=True,
-                                        headers=HEADER, timeout=REQUEST_TIMEOUT)
+    @parameterized.expand([
+        ["empty value", NULL_STRING, REQUEST_TIMEOUT_DEFAULT],
+        ["string value", STRING_INT, TIMEOUT_FLOAT],
+        ["integer value", TIMEOUT_INT, TIMEOUT_FLOAT],
+        ["float value", TIMEOUT_FLOAT, TIMEOUT_FLOAT],
+        ["zero value", ZERO_INT, REQUEST_TIMEOUT_DEFAULT],
+        ["zero(string) value", ZERO_STRING, REQUEST_TIMEOUT_DEFAULT],
+    ])
+    def test_request_timeout(
+        self, mock_discover, mock_parse_args, mock_request, test_name, input_value, expected_value
+    ):
+        """Test that tap handles various request timeout values."""
+        config = CONFIG.copy()
+        config["request_timeout"] = input_value
+        mock_parse_args.return_value = MockParseArgs(
+            state={}, discover=True, config=config
+        )
+        main()
 
+        # Verify that request method called with expected parameter value
+        mock_request.assert_called_with(
+            "GET",
+            "https://mixpanel.com/api/2.0/engage",
+            allow_redirects=True,
+            headers=HEADER,
+            timeout=expected_value,
+        )
diff --git a/tests/unittests/test_support_eu_endpoints.py b/tests/unittests/test_support_eu_endpoints.py
index 50eba94..3438276 100644
--- a/tests/unittests/test_support_eu_endpoints.py
+++ b/tests/unittests/test_support_eu_endpoints.py
@@ -1,19 +1,20 @@
 import unittest
 from unittest import mock
-from tap_mixpanel.streams import  Revenue, Export
-from tap_mixpanel.client import MixpanelClient
+
 from tap_mixpanel.__init__ import main
+from tap_mixpanel.client import MixpanelClient
+from tap_mixpanel.streams import Export, Revenue
 
 EU_CONFIG = {
-  "api_secret": "dummy_secret",
-  "date_window_size": "30",
-  "attribution_window": "5",
-  "project_timezone": "Europe/Amsterdam",
-  "select_properties_by_default": "true",
-  "start_date": "2020-02-01T00:00:00Z",
-  "user_agent": "tap-mixpanel <api_user_email@your_company.com>",
-  "eu_residency": True,
-  "end_date": "2020-03-02T00:00:00Z"
+    "api_secret": "dummy_secret",
+    "date_window_size": "30",
+    "attribution_window": "5",
+    "project_timezone": "Europe/Amsterdam",
+    "select_properties_by_default": "true",
+    "start_date": "2020-02-01T00:00:00Z",
+    "user_agent": "tap-mixpanel <api_user_email@your_company.com>",
+    "eu_residency": True,
+    "end_date": "2020-03-02T00:00:00Z",
 }
 STANDARD_CONFIG = {
     "api_secret": "dummy_secret",
@@ -24,28 +25,43 @@
     "start_date": "2020-02-01T00:00:00Z",
     "user_agent": "tap-mixpanel <api_user_email@your_company.com>",
     "eu_residency": False,
-    "end_date": "2020-03-02T00:00:00Z"
+    "end_date": "2020-03-02T00:00:00Z",
 }
 
-class MockStream():
+
+class MockStream:
+    """Mock stream object class"""
+
     def __init__(self, stream):
         self.stream = stream
 
-class MockCatalog():
+
+class MockCatalog:
+    """Mock catalog object class."""
+
     def __init__(self, name):
         self.name = name
 
     def get_selected_streams(self, state):
+        """Returns the list of selected stream objects."""
         return [MockStream(self.name)]
 
-class MockParseArgs():
+
+class MockParseArgs:
+    """Mock args object class."""
+
     def __init__(self, state, discover, config):
         self.state = state
         self.discover = discover
         self.config = config
 
-class Mockresponse:
-    def __init__(self, resp, status_code, content=[""], headers=None, raise_error=False, text={}):
+
+class MockResponse:
+    """Mocked standard HTTPResponse to test error handling."""
+
+    def __init__(
+        self, resp, status_code, content=[""], headers=None, raise_error=False, text={}
+    ):
         self.json_data = resp
         self.status_code = status_code
         self.content = content
@@ -54,84 +70,166 @@ def __init__(self, resp, status_code, content=[""], headers=None, raise_error=Fa
         self.text = text
         self.reason = "error"
 
-    def prepare(self):
-        return (self.json_data, self.status_code, self.content, self.headers, self.raise_error)
-
     def json(self):
+        """Returns a JSON object of the result."""
         return self.text
 
+
 class TestMixpanelSupportEuEndpoints(unittest.TestCase):
+    """
+    Test that europe domain support is working.
+    """
 
     @mock.patch("tap_mixpanel.client.MixpanelClient.request")
     @mock.patch("tap_mixpanel.streams.MixPanel.write_bookmark")
     @mock.patch("tap_mixpanel.streams.MixPanel.write_schema")
-    def test_support_eu_endpoints_except_export(self, mock_write_schema, mock_write_bookmark, mock_request):
+    def test_support_eu_endpoints_except_export(
+        self, mock_write_schema, mock_write_bookmark, mock_request
+    ):
+        """
+        Test case for the streams other than export stream that,
+        For eu_residency europe domain base url is called.
+        And for eu_residency 'false' in the config, default domain URL is called.
+        """
         mock_request.return_value = {}
-        mock_write_schema.return_value = ''
-        mock_write_bookmark.return_value = ''
+        mock_write_schema.return_value = ""
+        mock_write_bookmark.return_value = ""
 
         state = {}
-        catalog = MockCatalog('revenue')
+        catalog = MockCatalog("revenue")
 
-        client = MixpanelClient('','','')
+        client = MixpanelClient("", "", "")
         revenue_obj = Revenue(client)
-        revenue_obj.sync(catalog=catalog, state=state, config=EU_CONFIG, start_date="2020-02-01T00:00:00Z")
-
-        mock_request.assert_called_with(method='GET', url='https://eu.mixpanel.com/api/2.0', path='engage/revenue',
-                                        params='unit=day&from_date=2020-02-01&to_date=2020-03-02', endpoint='revenue')
+        revenue_obj.sync(
+            catalog=catalog,
+            state=state,
+            config=EU_CONFIG,
+            start_date="2020-02-01T00:00:00Z",
+        )
+
+        # Verify that with EU config, base url has eu-domain.
+        mock_request.assert_called_with(
+            method="GET",
+            url="https://eu.mixpanel.com/api/2.0",
+            path="engage/revenue",
+            params="unit=day&from_date=2020-02-01&to_date=2020-03-02",
+            endpoint="revenue",
+        )
 
         revenue_obj = Revenue(client)
-        revenue_obj.sync(catalog=catalog,state=state, config=STANDARD_CONFIG, start_date="2020-02-01T00:00:00Z")
-
-        mock_request.assert_called_with(method='GET', url='https://mixpanel.com/api/2.0', path='engage/revenue',
-                                        params='unit=day&from_date=2020-02-01&to_date=2020-03-02', endpoint='revenue')
-
+        revenue_obj.sync(
+            catalog=catalog,
+            state=state,
+            config=STANDARD_CONFIG,
+            start_date="2020-02-01T00:00:00Z",
+        )
+
+        # Verify that with standard config, base URL has default domain.
+        mock_request.assert_called_with(
+            method="GET",
+            url="https://mixpanel.com/api/2.0",
+            path="engage/revenue",
+            params="unit=day&from_date=2020-02-01&to_date=2020-03-02",
+            endpoint="revenue",
+        )
 
     @mock.patch("tap_mixpanel.client.MixpanelClient.request_export")
     @mock.patch("tap_mixpanel.streams.MixPanel.write_bookmark")
     @mock.patch("tap_mixpanel.streams.MixPanel.write_schema")
-    def test_support_export_eu_endpoint(self, mock_write_schema, mock_write_bookmark, mock_request_export):
+    def test_support_export_eu_endpoint(
+        self, mock_write_schema, mock_write_bookmark, mock_request_export
+    ):
+        """
+        Test case for the export stream (as it has different base url) that,
+        For eu_residency europe domain base url is called.
+        And for eu_residency 'false' in the config, default domain URL is called.
+        """
         mock_request_export.return_value = {}
-        mock_write_schema.return_value = ''
-        mock_write_bookmark.return_value = ''
+        mock_write_schema.return_value = ""
+        mock_write_bookmark.return_value = ""
 
         state = {}
-        catalog = MockCatalog('export')
+        catalog = MockCatalog("export")
 
-        client = MixpanelClient('','','')
+        client = MixpanelClient("", "", "")
         export_obj = Export(client)
-        export_obj.sync(catalog=catalog,state=state, config=EU_CONFIG, start_date="2020-02-01T00:00:00Z")
-
-        mock_request_export.assert_called_with(method='GET', url='https://data-eu.mixpanel.com/api/2.0', path='export',
-                                        params='from_date=2020-02-01&to_date=2020-03-02', endpoint='export')
+        export_obj.sync(
+            catalog=catalog,
+            state=state,
+            config=EU_CONFIG,
+            start_date="2020-02-01T00:00:00Z",
+        )
+
+        # Verify that with EU config, base url has eu-domain.
+        mock_request_export.assert_called_with(
+            method="GET",
+            url="https://data-eu.mixpanel.com/api/2.0",
+            path="export",
+            params="from_date=2020-02-01&to_date=2020-03-02",
+            endpoint="export",
+        )
 
         export_obj = Export(client)
-        export_obj.sync(catalog=catalog,state=state, config=STANDARD_CONFIG, start_date="2020-02-01T00:00:00Z")
-
-        mock_request_export.assert_called_with(method='GET', url='https://data.mixpanel.com/api/2.0', path='export',
-                                        params='from_date=2020-02-01&to_date=2020-03-02', endpoint='export')
-
+        export_obj.sync(
+            catalog=catalog,
+            state=state,
+            config=STANDARD_CONFIG,
+            start_date="2020-02-01T00:00:00Z",
+        )
+
+        # Verify that with standard config, base URL has default domain.
+        mock_request_export.assert_called_with(
+            method="GET",
+            url="https://data.mixpanel.com/api/2.0",
+            path="export",
+            params="from_date=2020-02-01&to_date=2020-03-02",
+            endpoint="export",
+        )
 
     @mock.patch("requests.Session.request")
     @mock.patch("singer.utils.parse_args")
     @mock.patch("tap_mixpanel.__init__.do_discover")
-    def test_support_eu_endpoint_in_discover(self, mock_discover, mock_parse_args, mock_request):
-
-        mock_request.return_value = Mockresponse("", status_code=200)
-        mock_discover.return_value = ''
-        mock_parse_args.return_value = MockParseArgs(state = {}, discover = True, config=EU_CONFIG)
-        r = main()
-
+    def test_support_eu_endpoint_in_discover(
+        self, mock_discover, mock_parse_args, mock_request
+    ):
+        """
+        Test case for the discover mode,
+        For eu_residency europe domain base url is called.
+        And for eu_residency 'false' in the config, default domain URL is called.
+        """
+
+        mock_request.return_value = MockResponse("", status_code=200)
+        mock_discover.return_value = ""
+        mock_parse_args.return_value = MockParseArgs(
+            state={}, discover=True, config=EU_CONFIG
+        )
         header = {
-            'User-Agent': 'tap-mixpanel <api_user_email@your_company.com>',
-            'Accept': 'application/json',
-            'Authorization': 'Basic ZHVtbXlfc2VjcmV0'
+            "User-Agent": "tap-mixpanel <api_user_email@your_company.com>",
+            "Accept": "application/json",
+            "Authorization": "Basic ZHVtbXlfc2VjcmV0",
         }
-        mock_request.assert_called_with('GET','https://eu.mixpanel.com/api/2.0/engage', allow_redirects=True,
-                                        headers=header, timeout=300)
-
-        mock_parse_args.return_value = MockParseArgs(state = {}, discover = True, config=STANDARD_CONFIG)
-        r = main()
 
-        mock_request.assert_called_with('GET','https://mixpanel.com/api/2.0/engage', allow_redirects=True,
-                                        headers=header, timeout=300)
+        main()
+
+        # Verify that with EU config, base url has eu-domain.
+        mock_request.assert_called_with(
+            "GET",
+            "https://eu.mixpanel.com/api/2.0/engage",
+            allow_redirects=True,
+            headers=header,
+            timeout=300,
+        )
+
+        mock_parse_args.return_value = MockParseArgs(
+            state={}, discover=True, config=STANDARD_CONFIG
+        )
+        main()
+
+        # Verify that with standard config, base URL has default domain.
+        mock_request.assert_called_with(
+            "GET",
+            "https://mixpanel.com/api/2.0/engage",
+            allow_redirects=True,
+            headers=header,
+            timeout=300,
+        )
diff --git a/tests/unittests/test_transform_event_times.py b/tests/unittests/test_transform_event_times.py
index 2fba5d3..37d4108 100644
--- a/tests/unittests/test_transform_event_times.py
+++ b/tests/unittests/test_transform_event_times.py
@@ -1,35 +1,53 @@
-from tap_mixpanel.transform import transform_event_times
-import pytz
 import unittest
 from datetime import datetime
 
+import pytz
+
+from tap_mixpanel.transform import transform_event_times
+
 UTC = pytz.utc
 
 
 class TestTransformEventTimes(unittest.TestCase):
+    """
+    Test that `transform_event_times` function formats,
+    the Eastern and UTC formatted dates to ISO datetime.
+    """
 
     def test_utc_now(self):
-        
+        """
+        Testcase for the UTC timezone is converted to the given format.
+        """
+
         input_time = datetime.utcnow()
-        
+
         record = {"time": input_time.timestamp()}
         project_timezone = "UTC"
-        
+
         actual = transform_event_times(record, project_timezone)
-        expected = {"time": input_time.astimezone(UTC).strftime("%04Y-%m-%dT%H:%M:%S.000000Z")}
-        
+        expected = {
+            "time": input_time.astimezone(UTC).strftime("%04Y-%m-%dT%H:%M:%S.000000Z")
+        }
+
+        # Verify that record uis converted as expected.
         self.assertEqual(expected, actual)
-        
-        
+
     def test_eastern_time(self):
+        """
+        Testcase for the eastern timezone is converted to given formate.
+        """
+
         project_timezone = "US/Eastern"
         EASTERN = pytz.timezone(project_timezone)
         # This gives us 2021-08-12T11:00:00-4:00
         input_time = EASTERN.localize(datetime(2021, 8, 12, 11, 0, 0))
-        
+
         record = {"time": input_time.timestamp()}
         actual = transform_event_times(record, project_timezone)
-        
-        expected = {"time": input_time.astimezone(UTC).strftime("%04Y-%m-%dT%H:%M:%S.000000Z")}
 
-        self.assertEqual(expected, actual)
\ No newline at end of file
+        expected = {
+            "time": input_time.astimezone(UTC).strftime("%04Y-%m-%dT%H:%M:%S.000000Z")
+        }
+
+        # Verify that record uis converted as expected.
+        self.assertEqual(expected, actual)

From 40d63b1b4b03e483f6926039d974653e50019749 Mon Sep 17 00:00:00 2001
From: NevilParikh14 <92399024+NevilParikh14@users.noreply.github.com>
Date: Mon, 3 Oct 2022 15:38:29 +0530
Subject: [PATCH 02/22] updated config.yml

---
 .circleci/config.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index d60c08a..fa4f5e9 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -30,7 +30,7 @@ jobs:
           command: |
             source /usr/local/share/virtualenvs/tap-mixpanel/bin/activate
             pip install coverage parameterized
-			python -m pytest --junitxml=junit/test-result.xml --cov=tap_mixpanel --cov-report=html tests/unittests/
+            python -m pytest --junitxml=junit/test-result.xml --cov=tap_mixpanel --cov-report=html tests/unittests/
       - store_test_results:
           path: test_output/report.xml
       - store_artifacts:

From 48b65496d5af9dba2e73ef2accae6d0c4d097736 Mon Sep 17 00:00:00 2001
From: NevilParikh14 <92399024+NevilParikh14@users.noreply.github.com>
Date: Mon, 3 Oct 2022 16:38:02 +0530
Subject: [PATCH 03/22] updated config.yml

---
 .circleci/config.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index fa4f5e9..74b8317 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -19,6 +19,7 @@ jobs:
          name: 'pylint tap'
          command: |
            source /usr/local/share/virtualenvs/tap-mixpanel/bin/activate
+           pip install pylint
            pylint tap_mixpanel -d 'broad-except,chained-comparison,empty-docstring,fixme,invalid-name,line-too-long,missing-module-docstring,no-else-raise,no-else-return,too-few-public-methods,too-many-arguments,too-many-branches,too-many-lines,too-many-locals,ungrouped-imports,too-many-public-methods,protected-access,too-many-statements,not-an-iterable'
       - run:
           name: 'JSON Validator'

From 485e80407d9eb6aabc136e2b0eba92223c693267 Mon Sep 17 00:00:00 2001
From: NevilParikh14 <92399024+NevilParikh14@users.noreply.github.com>
Date: Mon, 3 Oct 2022 16:42:04 +0530
Subject: [PATCH 04/22] updated base.py

---
 tests/tap_tester/base.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/tap_tester/base.py b/tests/tap_tester/base.py
index b3e24c0..0cf4558 100644
--- a/tests/tap_tester/base.py
+++ b/tests/tap_tester/base.py
@@ -8,7 +8,8 @@
 
 import dateutil.parser
 import pytz
-from tap_tester import LOGGER, BaseCase, connections, menagerie, runner
+from tap_tester import LOGGER, connections, menagerie, runner
+from tap_tester.base_case import BaseCase
 
 
 class TestMixPanelBase(BaseCase):

From 9ce1cd57dcc9e1247f9ee8a506c9b65536941878 Mon Sep 17 00:00:00 2001
From: NevilParikh14 <nevil.parikh@crestdatasys.com>
Date: Tue, 4 Oct 2022 16:49:17 +0530
Subject: [PATCH 05/22] Reverted all_fields and pagination test

---
 tests/tap_tester/test_mixpanel_all_fields.py  | 125 --------------
 .../test_mixpanel_all_fields_pagination.py    | 154 ++++++++++++++++++
 tests/tap_tester/test_mixpanel_pagination.py  | 107 ------------
 3 files changed, 154 insertions(+), 232 deletions(-)
 delete mode 100644 tests/tap_tester/test_mixpanel_all_fields.py
 create mode 100644 tests/tap_tester/test_mixpanel_all_fields_pagination.py
 delete mode 100644 tests/tap_tester/test_mixpanel_pagination.py

diff --git a/tests/tap_tester/test_mixpanel_all_fields.py b/tests/tap_tester/test_mixpanel_all_fields.py
deleted file mode 100644
index 84f0ee2..0000000
--- a/tests/tap_tester/test_mixpanel_all_fields.py
+++ /dev/null
@@ -1,125 +0,0 @@
-from tap_tester import runner, connections, menagerie
-
-from base import TestMixPanelBase
-
-
-class MixPanelAllFieldsTest(TestMixPanelBase):
-
-    @staticmethod
-    def name():
-        return "tap_tester_mixpanel_all_fields_test"
-
-    def all_fields_test(self):
-        """
-        All Fields Test.
-
-        • Verify that when all fields are selected more than the automatic fields are replicated.
-        • Verify no unexpected streams were replicated
-        • Verify that more than just the automatic fields are replicated for each stream.
-        • Verify all fields for each stream are replicated
-        • Verify that the automatic fields are sent to the target
-        """
-
-        # Only following below 2 streams support pagination
-        expected_streams = self.expected_streams()
-
-        expected_automatic_fields = self.expected_automatic_fields()
-        conn_id = connections.ensure_connection(self)
-
-        found_catalogs = self.run_and_verify_check_mode(conn_id)
-
-        # Table and field selection
-        test_catalogs_all_fields = [
-            catalog
-            for catalog in found_catalogs
-            if catalog.get("tap_stream_id") in expected_streams
-        ]
-
-        self.perform_and_verify_table_and_field_selection(
-            conn_id, test_catalogs_all_fields
-        )
-
-        # Grab metadata after performing table-and-field selection to set expectations
-        # used for asserting all fields are replicated
-        stream_to_all_catalog_fields = dict()
-        for catalog in test_catalogs_all_fields:
-            stream_id, stream_name = catalog["stream_id"], catalog["stream_name"]
-            catalog_entry = menagerie.get_annotated_schema(conn_id, stream_id)
-            fields_from_field_level_md = [
-                md_entry["breadcrumb"][1]
-                for md_entry in catalog_entry["metadata"]
-                if md_entry["breadcrumb"] != []
-            ]
-            stream_to_all_catalog_fields[stream_name] = set(fields_from_field_level_md)
-
-        self.run_and_verify_sync(conn_id)
-
-        actual_fields_by_stream = runner.examine_target_output_for_fields()
-
-        synced_records = runner.get_records_from_target_output()
-
-        # Verify no unexpected streams were replicated
-        synced_stream_names = set(synced_records.keys())
-        self.assertSetEqual(expected_streams, synced_stream_names)
-
-        # All Fields Test
-        for stream in expected_streams:
-            with self.subTest(logging="Primary Functional Test", stream=stream):
-
-                # Expected values
-                expected_all_keys = stream_to_all_catalog_fields[stream]
-                expected_automatic_keys = expected_automatic_fields.get(stream, set())
-
-                # Collect actual values
-                messages = synced_records.get(stream)
-                actual_all_keys = set()
-                for message in messages["messages"]:
-                    if message["action"] == "upsert":
-                        actual_all_keys.update(set(message["data"].keys()))
-
-                # Verify that the automatic fields are sent to the target
-                self.assertTrue(
-                    actual_fields_by_stream.get(stream, set()).issuperset(
-                        expected_automatic_keys
-                    ),
-                    msg="The fields sent to the target don't include all automatic fields",
-                )
-
-                # Verify that more than just the automatic fields are replicated for each stream.
-                # 'cohort_members' has just 2 key and both are automatic
-                if stream != "cohort_members":
-                    self.assertGreater(
-                        len(expected_all_keys), len(expected_automatic_keys)
-                    )
-
-                self.assertTrue(
-                    expected_automatic_keys.issubset(expected_all_keys),
-                    msg=f'{expected_automatic_keys-expected_all_keys} is not in "expected_all_keys"',
-                )
-
-                # As we can't find the below fields in the docs and also
-                # it won't be generated by mixpanel APIs now so expected.
-                if stream == "export":
-                    expected_all_keys = expected_all_keys - {
-                        "labels",
-                        "sampling_factor",
-                        "dataset",
-                        "mp_reserved_duration_s",
-                        "mp_reserved_origin_end",
-                        "mp_reserved_origin_start",
-                        "mp_reserved_event_count",
-                    }
-
-                # Verify all fields for each stream are replicated.
-                # Skip engage as it return records in random manner with dynamic fields.
-                if not stream == "engage":
-                    self.assertSetEqual(expected_all_keys, actual_all_keys)
-
-    def test_run(self):
-        # Pagination test for standard server
-        self.eu_residency = False
-        self.all_fields_test()
-
-        # Pagination test for EU residency server
-        self.eu_residency = True
-        self.all_fields_test()
diff --git a/tests/tap_tester/test_mixpanel_all_fields_pagination.py b/tests/tap_tester/test_mixpanel_all_fields_pagination.py
new file mode 100644
index 0000000..c4e8034
--- /dev/null
+++ b/tests/tap_tester/test_mixpanel_all_fields_pagination.py
@@ -0,0 +1,154 @@
+from math import ceil
+
+from tap_tester import connections, menagerie, runner
+
+from base import TestMixPanelBase
+
+
+class MixPanelPaginationAllFieldsTest(TestMixPanelBase):
+
+    @staticmethod
+    def name():
+        return "tap_tester_mixpanel_pagination_all_fields_test"
+
+    def pagination_test_run(self):
+        """
+        All Fields Test
+        • Verify that when all fields are selected more than the automatic fields are replicated.
+        • Verify no unexpected streams were replicated
+        • Verify that more than just the automatic fields are replicated for each stream.
+        • Verify all fields for each stream are replicated
+        • Verify that the automatic fields are sent to the target
+        Pagination Test
+        • Verify that for each stream you can get multiple pages of data
+        • Verify no duplicate pages are replicated
+        • Verify no unexpected streams were replicated
+        PREREQUISITE
+        For EACH stream add enough data that you surpass the limit of a single
+        fetch of data.  For instance if you have a limit of 250 records ensure
+        that 251 (or more) records have been posted for that stream.
+        """
+
+        # Only following below 2 streams support pagination
+        streams_to_test_all_fields = self.expected_streams()
+        streams_to_test_pagination = {'engage', 'cohort_members'}
+
+        expected_automatic_fields = self.expected_automatic_fields()
+        conn_id = connections.ensure_connection(self)
+
+        found_catalogs = self.run_and_verify_check_mode(conn_id)
+
+        # Table and field selection
+        test_catalogs_all_fields = [catalog for catalog in found_catalogs
+                                    if catalog.get('tap_stream_id') in streams_to_test_all_fields]
+
+        self.perform_and_verify_table_and_field_selection(
+            conn_id, test_catalogs_all_fields)
+
+        # Grab metadata after performing table-and-field selection to set expectations
+        # used for asserting all fields are replicated
+        stream_to_all_catalog_fields = dict()
+        for catalog in test_catalogs_all_fields:
+            stream_id, stream_name = catalog['stream_id'], catalog['stream_name']
+            catalog_entry = menagerie.get_annotated_schema(conn_id, stream_id)
+            fields_from_field_level_md = [md_entry['breadcrumb'][1]
+                                          for md_entry in catalog_entry['metadata']
+                                          if md_entry['breadcrumb'] != []]
+            stream_to_all_catalog_fields[stream_name] = set(fields_from_field_level_md)
+
+        record_count_by_stream = self.run_and_verify_sync(conn_id)
+
+        actual_fields_by_stream = runner.examine_target_output_for_fields()
+
+        synced_records = runner.get_records_from_target_output()
+
+        # Verify no unexpected streams were replicated
+        synced_stream_names = set(synced_records.keys())
+        self.assertSetEqual(streams_to_test_all_fields, synced_stream_names)
+
+        # All Fields Test
+        for stream in streams_to_test_all_fields:
+            with self.subTest(logging="Primary Functional Test", stream=stream):
+
+                # Expected values
+                expected_all_keys = stream_to_all_catalog_fields[stream]
+                expected_automatic_keys = expected_automatic_fields.get(stream, set())
+
+                # Collect actual values
+                messages = synced_records.get(stream)
+                actual_all_keys = set()
+                for message in messages['messages']:
+                    if message['action'] == 'upsert':
+                        actual_all_keys.update(set(message['data'].keys()))
+
+                # Verify that the automatic fields are sent to the target
+                self.assertTrue(
+                    actual_fields_by_stream.get(stream, set()).issuperset(
+                        expected_automatic_keys),
+                    msg="The fields sent to the target don't include all automatic fields")
+
+                # Verify that more than just the automatic fields are replicated for each stream.
+                if stream != "cohort_members":  # cohort_member has just 2 key and both are automatic
+                    self.assertGreater(len(expected_all_keys),
+                                       len(expected_automatic_keys))
+
+                self.assertTrue(expected_automatic_keys.issubset(
+                    expected_all_keys), msg=f'{expected_automatic_keys-expected_all_keys} is not in "expected_all_keys"')
+
+                # As we can't find the below fields in the docs and also
+                # it won't be generated by mixpanel APIs now so expected.
+                if stream == "export":
+                    expected_all_keys = expected_all_keys - {'labels', 'sampling_factor', 'dataset', 'mp_reserved_duration_s', 'mp_reserved_origin_end',
+                                                             'mp_reserved_origin_start', 'mp_reserved_event_count'}
+
+                # Verify all fields for each stream are replicated
+                # Skip engage as it return records in random manner with dynamic fields.
+                if not stream == "engage":
+                    self.assertSetEqual(expected_all_keys, actual_all_keys)
+
+        # Pagination Test
+        for stream in streams_to_test_pagination:
+            with self.subTest(stream=stream):
+
+                # Expected values
+                expected_primary_keys = self.expected_pks()[stream]
+
+                # Collect actual values
+                messages = synced_records.get(stream)
+                primary_keys_list = [tuple([message['data'][expected_pk] for expected_pk in expected_primary_keys])
+                                     for message in messages['messages'] if message['action'] == 'upsert']
+
+                # Verify that we can paginate with all fields selected
+                record_count_sync = record_count_by_stream.get(stream, 0)
+                self.assertGreater(record_count_sync, self.API_LIMIT,
+                                   msg="The number of records is not over the stream max limit")
+
+                # Chunk the replicated records (just primary keys) into expected pages
+                pages = []
+                page_count = ceil(len(primary_keys_list) / self.API_LIMIT)
+                page_size = self.API_LIMIT
+                for page_index in range(page_count):
+                    page_start = page_index * page_size
+                    page_end = (page_index + 1) * page_size
+                    pages.append(set(primary_keys_list[page_start:page_end]))
+
+                # Verify by primary keys that data is unique for each page
+                for current_index, current_page in enumerate(pages):
+                    with self.subTest(current_page_primary_keys=current_page):
+
+                        for other_index, other_page in enumerate(pages):
+                            if current_index == other_index:
+                                continue  # don't compare the page to itself
+
+                            self.assertTrue(
+                                current_page.isdisjoint(other_page), msg=f'other_page_primary_keys={other_page}'
+                            )
+
+    def test_run(self):
+        # Pagination test for standard server
+        self.eu_residency = False
+        self.pagination_test_run()
+
+        # Pagination test for EU residency server
+        self.eu_residency = True
+        self.pagination_test_run()
diff --git a/tests/tap_tester/test_mixpanel_pagination.py b/tests/tap_tester/test_mixpanel_pagination.py
deleted file mode 100644
index 432ca19..0000000
--- a/tests/tap_tester/test_mixpanel_pagination.py
+++ /dev/null
@@ -1,107 +0,0 @@
-from math import ceil
-
-from tap_tester import connections, runner
-
-from base import TestMixPanelBase
-
-
-class MixPanelPaginationTest(TestMixPanelBase):
-
-    @staticmethod
-    def name():
-        return "tap_tester_mixpanel_pagination_test"
-
-    def pagination_test_run(self):
-        """
-        Pagination Test
-        • Verify that for each stream you can get multiple pages of data
-        • Verify no duplicate pages are replicated
-        • Verify no unexpected streams were replicated
-
-        PREREQUISITE
-        For EACH stream add enough data that you surpass the limit of a single
-        fetch of data.  For instance if you have a limit of 250 records ensure
-        that 251 (or more) records have been posted for that stream.
-        """
-
-        # Only following below 2 streams support pagination
-        expected_streams = {"engage", "cohort_members"}
-
-        conn_id = connections.ensure_connection(self)
-
-        found_catalogs = self.run_and_verify_check_mode(conn_id)
-
-        # Table and field selection
-        test_catalogs_all_fields = [
-            catalog
-            for catalog in found_catalogs
-            if catalog.get("tap_stream_id") in expected_streams
-        ]
-
-        self.perform_and_verify_table_and_field_selection(
-            conn_id, test_catalogs_all_fields
-        )
-
-        record_count_by_stream = self.run_and_verify_sync(conn_id)
-
-        synced_records = runner.get_records_from_target_output()
-
-        # Verify no unexpected streams were replicated
-        synced_stream_names = set(synced_records.keys())
-        self.assertSetEqual(expected_streams, synced_stream_names)
-
-        for stream in expected_streams:
-            with self.subTest(stream=stream):
-
-                # Expected values
-                expected_primary_keys = self.expected_pks()[stream]
-
-                # Collect actual values
-                messages = synced_records.get(stream)
-                primary_keys_list = [
-                    tuple(
-                        message["data"][expected_pk]
-                        for expected_pk in expected_primary_keys
-                    )
-                    for message in messages["messages"]
-                    if message["action"] == "upsert"
-                ]
-
-                # Verify that we can paginate with all fields selected
-                record_count_sync = record_count_by_stream.get(stream, 0)
-                self.assertGreater(
-                    record_count_sync,
-                    self.API_LIMIT,
-                    msg="The number of records is not over the stream max limit",
-                )
-
-                # Chunk the replicated records (just primary keys) into expected pages
-                pages = []
-                page_count = ceil(len(primary_keys_list) / self.API_LIMIT)
-                page_size = self.API_LIMIT
-                for page_index in range(page_count):
-                    page_start = page_index * page_size
-                    page_end = (page_index + 1) * page_size
-                    pages.append(set(primary_keys_list[page_start:page_end]))
-
-                # Verify by primary keys that data is unique for each page
-                for current_index, current_page in enumerate(pages):
-                    with self.subTest(current_page_primary_keys=current_page):
-
-                        for other_index, other_page in enumerate(pages):
-                            if current_index == other_index:
-                                continue  # Don't compare the page to itself
-
-                            self.assertTrue(
-                                current_page.isdisjoint(other_page),
-                                msg=f"other_page_primary_keys={other_page}",
-                            )
-
-    def test_run(self):
-        # Pagination test for standard server
-        self.eu_residency = False
-        self.pagination_test_run()
-
-        # Pagination test for EU residency server
-        self.eu_residency = True
-        self.pagination_test_run()

From 6d374aed515dcbbd6154d67120314b2450ce6823 Mon Sep 17 00:00:00 2001
From: prijendev <prijen.khokhani@crestdatasys.com>
Date: Fri, 7 Oct 2022 10:43:48 +0530
Subject: [PATCH 06/22] Provided support of service account authentication

---
 tap_mixpanel/__init__.py                      |  6 +-
 tap_mixpanel/client.py                        | 49 +++++++++++++--
 tests/configuration/fixtures.py               |  2 +-
 tests/tap_tester/base.py                      |  9 +++
 .../test_mixpanel_all_fields_pagination.py    |  5 ++
 tests/unittests/test_error_handling.py        | 13 +++-
 .../test_request_timeout_param_value.py       |  2 +
 .../test_service_account_authentication.py    | 61 +++++++++++++++++++
 tests/unittests/test_support_eu_endpoints.py  |  6 +-
 9 files changed, 141 insertions(+), 12 deletions(-)
 create mode 100644 tests/unittests/test_service_account_authentication.py

diff --git a/tap_mixpanel/__init__.py b/tap_mixpanel/__init__.py
index 00c5e40..ed677a1 100644
--- a/tap_mixpanel/__init__.py
+++ b/tap_mixpanel/__init__.py
@@ -17,7 +17,6 @@
 REQUEST_TIMEOUT = 300
 REQUIRED_CONFIG_KEYS = [
     "project_timezone",
-    "api_secret",
     "attribution_window",
     "start_date",
     "user_agent",
@@ -72,7 +71,10 @@ def main():
         api_domain = "mixpanel.com"
 
     with MixpanelClient(
-        parsed_args.config["api_secret"],
+        parsed_args.config.get("api_secret"),
+        parsed_args.config.get("service_account_username"),
+        parsed_args.config.get("service_account_secret"),
+        parsed_args.config.get("project_id"),
         api_domain,
         request_timeout,
         parsed_args.config["user_agent"],
diff --git a/tap_mixpanel/client.py b/tap_mixpanel/client.py
index 5bc97dc..c8660a5 100644
--- a/tap_mixpanel/client.py
+++ b/tap_mixpanel/client.py
@@ -134,16 +134,32 @@ class MixpanelClient:
     """
     The client class used for making REST calls to the Mixpanel API.
     """
-    def __init__(self, api_secret, api_domain, request_timeout, user_agent=None):
+    def __init__(self, api_secret, service_account_username, service_account_secret, project_id, api_domain,
+                 request_timeout, user_agent=None):
         self.__api_secret = api_secret
+        self.__service_account_username = service_account_username
+        self.__service_account_secret = service_account_secret
+        self.__project_id = project_id
         self.__api_domain = api_domain
         self.__request_timeout = request_timeout
         self.__user_agent = user_agent
         self.__session = requests.Session()
         self.__verified = False
+        self.auth_header = None
         self.disable_engage_endpoint = False
 
     def __enter__(self):
+        """
+        Set auth_header with provided credentials. If credentials is not provided, then raise the exception.
+        """
+        if self.__api_secret:
+            self.auth_header = f"Basic {str(base64.urlsafe_b64encode(self.__api_secret.encode('utf-8')), 'utf-8')}"
+        elif self.__service_account_username and self.__service_account_secret:
+            service_account_auth = f"{self.__service_account_username}:{self.__service_account_secret}"
+            self.auth_header = f"Basic {str(base64.urlsafe_b64encode(service_account_auth.encode('utf-8')), 'utf-8')}"
+        else:
+            raise Exception("Error: Missing api_secret or service account username/secret in tap config.json")
+
         self.__verified = self.check_access()
         return self
 
@@ -167,9 +183,8 @@ def check_access(self):
             bool: Returns true if credentials are verified.
                   (else raises Exception)
         """
-        if self.__api_secret is None:
-            raise Exception("Error: Missing api_secret in tap config.json.")
         headers = {}
+        params = {}
         # Endpoint: simple API call to return a single record (org settings) to test access
         url = f"https://{self.__api_domain}/api/2.0/engage"
         if self.__user_agent:
@@ -177,14 +192,24 @@ def check_access(self):
         headers["Accept"] = "application/json"
         headers[
             "Authorization"
-        ] = f"Basic {str(base64.urlsafe_b64encode(self.__api_secret.encode('utf-8')), 'utf-8')}"
+        ] = self.auth_header
 
+        if self.__project_id:
+            params["project_id"] = self.__project_id
         try:
             response = self.__session.get(
                 url=url,
+                params=params,
                 timeout=self.__request_timeout,  # Request timeout parameter
                 headers=headers,
             )
+
+            if response.status_code == 403:
+                LOGGER.error(
+                    "HTTP-error-code: 403, Error: User is not a member of this project: %s or this project is invalid",
+                    self.__project_id)
+                raise MixpanelForbiddenError from None
+
         except requests.exceptions.Timeout as err:
             LOGGER.error("TIMEOUT ERROR: %s", str(err))
             raise ReadTimeoutError from None
@@ -288,9 +313,15 @@ def request(self, method, url=None, path=None, params=None, json=None, **kwargs)
         if method == "POST":
             kwargs["headers"]["Content-Type"] = "application/json"
 
+        if self.__project_id:
+            if isinstance(params, dict):
+                params['project_id'] = self.__project_id
+            else:
+                params = f"{params}&project_id={self.__project_id}"
+
         kwargs["headers"][
             "Authorization"
-        ] = f"Basic {str(base64.urlsafe_b64encode(self.__api_secret.encode('utf-8')), 'utf-8')}"
+        ] = self.auth_header
         with metrics.http_request_timer(endpoint) as timer:
             response = self.perform_request(
                 method=method, url=url, params=params, json=json, **kwargs
@@ -330,6 +361,12 @@ def request_export(
         else:
             endpoint = "export"
 
+        if self.__project_id:
+            if isinstance(params, dict):
+                params['project_id'] = self.__project_id
+            else:
+                params = f"{params}&project_id={self.__project_id}"
+
         if "headers" not in kwargs:
             kwargs["headers"] = {}
 
@@ -343,7 +380,7 @@ def request_export(
 
         kwargs["headers"][
             "Authorization"
-        ] = f"Basic {str(base64.urlsafe_b64encode(self.__api_secret.encode('utf-8')), 'utf-8')}"
+        ] = self.auth_header
         with metrics.http_request_timer(endpoint) as timer:
             response = self.perform_request(
                 method=method, url=url, params=params, json=json, stream=True, **kwargs
diff --git a/tests/configuration/fixtures.py b/tests/configuration/fixtures.py
index 869f533..a7cf8b4 100644
--- a/tests/configuration/fixtures.py
+++ b/tests/configuration/fixtures.py
@@ -8,7 +8,7 @@ def mixpanel_client():
     # Support of request_timeout have been added.
     # So, now MixpanelClient accept request_timeout parameter which is mandatory
     mixpanel_client = MixpanelClient(
-        "API_SECRET", api_domain="mixpanel.com", request_timeout=1
+        "API_SECRET", "username", "secret", "project_id", api_domain="mixpanel.com", request_timeout=1
     )  # Pass extra request_timeout parameter
     mixpanel_client._MixpanelClient__verified = True
     return mixpanel_client
diff --git a/tests/tap_tester/base.py b/tests/tap_tester/base.py
index 0cf4558..c130975 100644
--- a/tests/tap_tester/base.py
+++ b/tests/tap_tester/base.py
@@ -29,6 +29,7 @@ class TestMixPanelBase(BaseCase):
     start_date = ""
     end_date = ""
     eu_residency = True
+    service_account_authentication = False
 
     def tap_name(self):
         """The name of the tap."""
@@ -81,6 +82,10 @@ def setUp(self):
         missing_envs = []
         if self.eu_residency:
             creds = {"api_secret": "TAP_MIXPANEL_EU_RESIDENCY_API_SECRET"}
+        elif self.service_account_authentication:
+            creds = {"service_account_username": "TAP_MIXPANEL_SERVICE_ACCOUNT_USERNAME",
+                    "service_account_secret": "TAP_MIXPANEL_SERVICE_ACCOUNT_SECRET",
+                    "project_id": "TAP_MIXPANEL_SERVICE_ACCOUNT_PROJECT_ID"}
         else:
             creds = {"api_secret": "TAP_MIXPANEL_API_SECRET"}
 
@@ -132,6 +137,10 @@ def get_credentials(self):
         credentials_dict = {}
         if self.eu_residency:
             creds = {"api_secret": "TAP_MIXPANEL_EU_RESIDENCY_API_SECRET"}
+        elif self.service_account_authentication:
+            creds = {"service_account_username": "TAP_MIXPANEL_SERVICE_ACCOUNT_USERNAME",
+                    "service_account_secret": "TAP_MIXPANEL_SERVICE_ACCOUNT_SECRET",
+                    "project_id": "TAP_MIXPANEL_SERVICE_ACCOUNT_PROJECT_ID"}
         else:
             creds = {"api_secret": "TAP_MIXPANEL_API_SECRET"}
 
diff --git a/tests/tap_tester/test_mixpanel_all_fields_pagination.py b/tests/tap_tester/test_mixpanel_all_fields_pagination.py
index c4e8034..fb54f26 100644
--- a/tests/tap_tester/test_mixpanel_all_fields_pagination.py
+++ b/tests/tap_tester/test_mixpanel_all_fields_pagination.py
@@ -149,6 +149,11 @@ def test_run(self):
         self.eu_residency = False
         self.pagination_test_run()
 
+        # Pagination test with service account credentials
+        self.service_account_authentication = True
+        self.pagination_test_run()
+        self.service_account_authentication = False
+
         # Pagination test for EU residency server
         self.eu_residency = True
         self.pagination_test_run()
diff --git a/tests/unittests/test_error_handling.py b/tests/unittests/test_error_handling.py
index d4fc94e..a23a0e8 100644
--- a/tests/unittests/test_error_handling.py
+++ b/tests/unittests/test_error_handling.py
@@ -121,6 +121,9 @@ def test_perform_request_exception_handling(
         mock_request.return_value = mock_response
         mock_client = client.MixpanelClient(
             api_secret="mock_api_secret",
+            service_account_username="mock_service_account_username",
+            service_account_secret="service_account_secret",
+            project_id="project_id",
             api_domain="mock_api_domain",
             request_timeout=REQUEST_TIMEOUT,
         )
@@ -137,7 +140,6 @@ def test_perform_request_exception_handling(
         ["400 different timezone error", 400, mock_400_different_timezone(), client.MixpanelBadRequestError, "A validation exception has occurred. Please validate the timezone with the MixPanel UI under project settings."],
         ["400 timeout error", 400, MockResponse(400, text=timeout_400_error), client.MixpanelBadRequestError, "Timeout Error.(Please verify your credentials.)"],
         ["401 error", 401, MockResponse(401), client.MixpanelUnauthorizedError, "Invalid authorization credentials."],
-        ["403 error", 403, MockResponse(403), client.MixpanelForbiddenError, "User does not have permission to access the resource."],
         ["404 error", 404, MockResponse(404), client.MixpanelNotFoundError, "The resource you have specified cannot be found."],
         ["404 error", 404, mock_send_error(), client.MixpanelNotFoundError, "Resource not found error message from API response field 'error'."],
         ["404 error", 404, mock_send_message(), client.MixpanelNotFoundError, "Resource not found error message from API response field 'message'."],
@@ -155,6 +157,9 @@ def test_check_access_exception_handling(
         mock_request.return_value = mock_response
         mock_client = client.MixpanelClient(
             api_secret="mock_api_secret",
+            service_account_username="mock_service_account_username",
+            service_account_secret="service_account_secret",
+            project_id="project_id",
             api_domain="mock_api_domain",
             request_timeout=REQUEST_TIMEOUT,
         )
@@ -180,6 +185,9 @@ def test_request_with_handling_for_5xx_exception_handling(
         mock_request.return_value = mock_response
         mock_client = client.MixpanelClient(
             api_secret="mock_api_secret",
+            service_account_username="mock_service_account_username",
+            service_account_secret="service_account_secret",
+            project_id="project_id",
             api_domain="mock_api_domain",
             request_timeout=REQUEST_TIMEOUT,
         )
@@ -194,6 +202,9 @@ def test_check_access_handle_timeout_error(self, mock_request, mock_sleep):
         """
         mock_client = client.MixpanelClient(
             api_secret="mock_api_secret",
+            service_account_username="mock_service_account_username",
+            service_account_secret="service_account_secret",
+            project_id="project_id",
             api_domain="mock_api_domain",
             request_timeout=REQUEST_TIMEOUT,
         )
diff --git a/tests/unittests/test_request_timeout_param_value.py b/tests/unittests/test_request_timeout_param_value.py
index e9ca91b..ef1dfd7 100644
--- a/tests/unittests/test_request_timeout_param_value.py
+++ b/tests/unittests/test_request_timeout_param_value.py
@@ -75,6 +75,7 @@ def test_request_timeout_for_none_param_value(
             "https://mixpanel.com/api/2.0/engage",
             allow_redirects=True,
             headers=HEADER,
+            params={},
             timeout=REQUEST_TIMEOUT_DEFAULT,
         )
 
@@ -102,6 +103,7 @@ def test_request_timeout(
             "GET",
             "https://mixpanel.com/api/2.0/engage",
             allow_redirects=True,
+            params={},
             headers=HEADER,
             timeout=expected_value,
         )
diff --git a/tests/unittests/test_service_account_authentication.py b/tests/unittests/test_service_account_authentication.py
new file mode 100644
index 0000000..660b99d
--- /dev/null
+++ b/tests/unittests/test_service_account_authentication.py
@@ -0,0 +1,61 @@
+import unittest
+from unittest import mock
+from tests.unittests.test_error_handling import MockResponse
+from tap_mixpanel.client import MixpanelClient, MixpanelForbiddenError
+
+class TestServiceAccountAuthentication(unittest.TestCase):
+    """
+    Test that tap do authentication with service account credentials without any error if it is provided.
+    """
+
+    @mock.patch("tap_mixpanel.client.MixpanelClient.check_access")
+    def test_token_creds(self, mock_check_access):
+        """Test authentication with token credentials(api_secret).
+
+        Args:
+            mock_check_access: Mock the check_access method to test authentication.
+        """
+        with MixpanelClient("api_secret", None, None, None,"api_domain", 300) as client_:
+            pass
+        
+        self.assertEqual(client_.auth_header, "Basic YXBpX3NlY3JldA==")
+
+    @mock.patch("tap_mixpanel.client.MixpanelClient.check_access")
+    def test_service_account_creds(self, mock_check_access):
+        """Test authentication with service account credentials(username, secret).
+
+        Args:
+            mock_check_access: Mock the check_access method to test authentication.
+        """
+        with MixpanelClient(None, "service_account_username", "service_account_secret", "project_id","api_domain", 300) as client_:
+            pass
+        
+        self.assertEqual(client_.auth_header, "Basic c2VydmljZV9hY2NvdW50X3VzZXJuYW1lOnNlcnZpY2VfYWNjb3VudF9zZWNyZXQ=")
+
+    @mock.patch("tap_mixpanel.client.MixpanelClient.check_access")
+    def test_no_creds(self, mock_check_access):
+        """Test that tap throws an error if credentials is not provided.
+
+        Args:
+            mock_check_access: Mock the check_access method to test authentication.
+        """
+        with self.assertRaises(Exception) as e:
+            with MixpanelClient(None, None, None, None,"api_domain", 300) as client_:
+                pass
+        
+        self.assertEqual(str(e.exception), "Error: Missing api_secret or service account username/secret in tap config.json")
+
+    @mock.patch("requests.Session.request", return_value = MockResponse(403))
+    @mock.patch("tap_mixpanel.client.LOGGER.error")
+    def test_check_access_403_error_for_service_account_creds(self, mock_logger, mock_request):
+        """Test that tap handles 403 error with proper message.
+
+        Args:
+            mock_logger: Mock of LOGGER to verify the logger message
+            mock_request: Mock Session.request to explicitly raise the forbidden(403) error.
+        """
+        with self.assertRaises(MixpanelForbiddenError):
+            with MixpanelClient(None, "service_account_username", "service_account_secret", "project_id","api_domain", 300) as client_:
+                    client_.check_access()
+    
+        mock_logger.assert_called_with('HTTP-error-code: 403, Error: User is not a member of this project: %s or this project is invalid', 'project_id')
diff --git a/tests/unittests/test_support_eu_endpoints.py b/tests/unittests/test_support_eu_endpoints.py
index 3438276..a83204e 100644
--- a/tests/unittests/test_support_eu_endpoints.py
+++ b/tests/unittests/test_support_eu_endpoints.py
@@ -98,7 +98,7 @@ def test_support_eu_endpoints_except_export(
         state = {}
         catalog = MockCatalog("revenue")
 
-        client = MixpanelClient("", "", "")
+        client = MixpanelClient("", "", "", "", "", "")
         revenue_obj = Revenue(client)
         revenue_obj.sync(
             catalog=catalog,
@@ -151,7 +151,7 @@ def test_support_export_eu_endpoint(
         state = {}
         catalog = MockCatalog("export")
 
-        client = MixpanelClient("", "", "")
+        client = MixpanelClient("", "", "", "", "", "")
         export_obj = Export(client)
         export_obj.sync(
             catalog=catalog,
@@ -216,6 +216,7 @@ def test_support_eu_endpoint_in_discover(
             "GET",
             "https://eu.mixpanel.com/api/2.0/engage",
             allow_redirects=True,
+            params={},
             headers=header,
             timeout=300,
         )
@@ -231,5 +232,6 @@ def test_support_eu_endpoint_in_discover(
             "https://mixpanel.com/api/2.0/engage",
             allow_redirects=True,
             headers=header,
+            params={},
             timeout=300,
         )

From fff35437813e07de12fe72c5323008e2bdc320f8 Mon Sep 17 00:00:00 2001
From: prijendev <prijen.khokhani@crestdatasys.com>
Date: Fri, 7 Oct 2022 11:51:34 +0530
Subject: [PATCH 07/22] Fixed pylint issue.

---
 .circleci/config.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 74b8317..5cd004f 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -20,7 +20,7 @@ jobs:
          command: |
            source /usr/local/share/virtualenvs/tap-mixpanel/bin/activate
            pip install pylint
-           pylint tap_mixpanel -d 'broad-except,chained-comparison,empty-docstring,fixme,invalid-name,line-too-long,missing-module-docstring,no-else-raise,no-else-return,too-few-public-methods,too-many-arguments,too-many-branches,too-many-lines,too-many-locals,ungrouped-imports,too-many-public-methods,protected-access,too-many-statements,not-an-iterable'
+           pylint tap_mixpanel -d 'broad-except,chained-comparison,empty-docstring,fixme,invalid-name,line-too-long,missing-module-docstring,no-else-raise,no-else-return,too-few-public-methods,too-many-arguments,too-many-branches,too-many-lines,too-many-locals,ungrouped-imports,too-many-public-methods,protected-access,too-many-statements,not-an-iterable,too-many-instance-attributes'
       - run:
           name: 'JSON Validator'
           command: |

From 7c39ce61cd52525b0c393258d7e9fdb86aa8522a Mon Sep 17 00:00:00 2001
From: prijendev <prijen.khokhani@crestdatasys.com>
Date: Fri, 7 Oct 2022 12:03:55 +0530
Subject: [PATCH 08/22] Updated Readme.

---
 README.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/README.md b/README.md
index b2f4d6d..d8bcf88 100644
--- a/README.md
+++ b/README.md
@@ -133,6 +133,9 @@ More details may be found in the [Mixpanel API Authentication](https://developer
    - `start_date` - the default value to use if no bookmark exists for an endpoint (rfc3339 date string)
    - `user_agent` (string, optional): Process and email for API logging purposes. Example: `tap-mixpanel <api_user_email@your_company.com>`
    - `api_secret` (string, `ABCdef123`): an API secret for each project in Mixpanel. This can be found in the Mixpanel Console, upper-right Settings (gear icon), Organization Settings > Projects and in the Access Keys section. For this tap, only the api_secret is needed (the api_key is legacy and the token is used only for uploading data). Each Mixpanel project has a different api_secret; therefore each Singer tap pipeline instance is for a single project.
+   - `service_account_username` (string, `username12`): Username of the service account.
+   - `service_account_secret` (string, `ABCdef123`): Secret of the service account.
+   - `project_id` (string, `10451202`): Id of the project which is connected to the provided service account.
    - `date_window_size` (integer, `30`): Number of days for date window looping through transactional endpoints with from_date and to_date. Default date_window_size is 30 days. Clients with large volumes of events may want to decrease this to 14, 7, or even down to 1-2 days.
    - `attribution_window` (integer, `5`): Latency minimum number of days to look-back to account for delays in attributing accurate results. [Default attribution window is 5 days](https://help.mixpanel.com/hc/en-us/articles/115004616486-Tracking-If-Users-Are-Offline).
    - `project_timezone` (string like `US/Pacific`): Time zone in which integer date times are stored. The project timezone may be found in the project settings in the Mixpanel console. [More info about timezones](https://help.mixpanel.com/hc/en-us/articles/115004547203-Manage-Timezones-for-Projects-in-Mixpanel). 

From cd74b6a81376712f7fb48bebada18777e657dad7 Mon Sep 17 00:00:00 2001
From: kethan1122 <kcherukuri@talend.com>
Date: Thu, 15 Jun 2023 10:15:18 +0530
Subject: [PATCH 09/22] fix pylint

---
 .circleci/config.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index e45b7bd..38d3008 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -23,7 +23,7 @@ jobs:
            source /usr/local/share/virtualenvs/tap-mixpanel/bin/activate
            source dev_env.sh
            pip install pylint
-           pylint tap_mixpanel -d "$PYLINT_DISABLE_LIST,too-many-statements,protected-access,redefined-builtin"
+           pylint tap_mixpanel -d "$PYLINT_DISABLE_LIST,too-many-statements,protected-access,redefined-builtin,too-many-instance-attributes"
       - run:
           name: 'JSON Validator'
           command: |

From 52b02dd023e2d41a704a8f09ccdfb38eabea6c1f Mon Sep 17 00:00:00 2001
From: kethan1122 <kcherukuri@talend.com>
Date: Thu, 15 Jun 2023 10:20:01 +0530
Subject: [PATCH 10/22] generate catalog if not configured

---
 tap_mixpanel/__init__.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/tap_mixpanel/__init__.py b/tap_mixpanel/__init__.py
index e455fbf..1c66e6c 100644
--- a/tap_mixpanel/__init__.py
+++ b/tap_mixpanel/__init__.py
@@ -89,11 +89,14 @@ def main():
 
         if parsed_args.discover:
             do_discover(client, properties_flag)
-        elif parsed_args.catalog:
+        else:
+            catalog = parsed_args.catalog
+            if not catalog:
+                catalog = _discover(client, properties_flag)
             _sync(
                 client=client,
                 config=config,
-                catalog=parsed_args.catalog,
+                catalog=catalog,
                 state=state,
                 start_date=start_date,
             )

From 76fc739b29f41c49e9e56ed99f593fe9c63c85f5 Mon Sep 17 00:00:00 2001
From: kethan1122 <kcherukuri@talend.com>
Date: Thu, 15 Jun 2023 10:47:05 +0530
Subject: [PATCH 11/22] keep api_domain as attribute for client

---
 tap_mixpanel/__init__.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tap_mixpanel/__init__.py b/tap_mixpanel/__init__.py
index 1c66e6c..a6fdfad 100644
--- a/tap_mixpanel/__init__.py
+++ b/tap_mixpanel/__init__.py
@@ -85,6 +85,7 @@ def main():
             state = parsed_args.state
 
         config = parsed_args.config
+        client.__api_domain = api_domain
         properties_flag = config.get("select_properties_by_default")
 
         if parsed_args.discover:

From 20c3dd9add0d08fcb8edba14243559be3976090d Mon Sep 17 00:00:00 2001
From: kethan1122 <kcherukuri@talend.com>
Date: Thu, 15 Jun 2023 11:12:43 +0530
Subject: [PATCH 12/22] remove redundant code in unittests

---
 tests/unittests/test_transform_event_times.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/tests/unittests/test_transform_event_times.py b/tests/unittests/test_transform_event_times.py
index 10531c7..aeef637 100644
--- a/tests/unittests/test_transform_event_times.py
+++ b/tests/unittests/test_transform_event_times.py
@@ -4,10 +4,6 @@
 from datetime import datetime
 from tap_mixpanel.transform import transform_event_times
 
-import pytz
-
-from tap_mixpanel.transform import transform_event_times
-
 UTC = pytz.utc
 
 

From 6629ef2d30faba5863c36b65c7d861aabbf7d469 Mon Sep 17 00:00:00 2001
From: kethan1122 <kcherukuri@talend.com>
Date: Thu, 15 Jun 2023 12:18:59 +0530
Subject: [PATCH 13/22] add auth_type for auth configuration

---
 tap_mixpanel/__init__.py | 5 +++++
 tap_mixpanel/client.py   | 5 +++--
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/tap_mixpanel/__init__.py b/tap_mixpanel/__init__.py
index a6fdfad..e2e1c91 100644
--- a/tap_mixpanel/__init__.py
+++ b/tap_mixpanel/__init__.py
@@ -70,6 +70,10 @@ def main():
     else:
         api_domain = "mixpanel.com"
 
+    auth_type = parsed_args.config.get("auth_type")
+    if not auth_type:
+        auth_type = "project_secret"
+
     with MixpanelClient(
         parsed_args.config.get("api_secret"),
         parsed_args.config.get("service_account_username"),
@@ -78,6 +82,7 @@ def main():
         api_domain,
         request_timeout,
         parsed_args.config["user_agent"],
+        auth_type
     ) as client:
 
         state = {}
diff --git a/tap_mixpanel/client.py b/tap_mixpanel/client.py
index 5a9cc03..1e1e837 100644
--- a/tap_mixpanel/client.py
+++ b/tap_mixpanel/client.py
@@ -136,7 +136,7 @@ class MixpanelClient:
     The client class used for making REST calls to the Mixpanel API.
     """
     def __init__(self, api_secret, service_account_username, service_account_secret, project_id, api_domain,
-                 request_timeout, user_agent=None):
+                 request_timeout, user_agent=None, auth_type='project_secret'):
         self.__api_secret = api_secret
         self.__service_account_username = service_account_username
         self.__service_account_secret = service_account_secret
@@ -145,6 +145,7 @@ def __init__(self, api_secret, service_account_username, service_account_secret,
         self.__request_timeout = request_timeout
         self.__user_agent = user_agent
         self.__session = requests.Session()
+        self.__auth_type = auth_type
         self.__verified = False
         self.auth_header = None
         self.disable_engage_endpoint = False
@@ -153,7 +154,7 @@ def __enter__(self):
         """
         Set auth_header with provided credentials. If credentials is not provided, then raise the exception.
         """
-        if self.__api_secret:
+        if self.__auth_type == 'project_secret' and self.__api_secret:
             self.auth_header = f"Basic {str(base64.urlsafe_b64encode(self.__api_secret.encode('utf-8')), 'utf-8')}"
         elif self.__service_account_username and self.__service_account_secret:
             service_account_auth = f"{self.__service_account_username}:{self.__service_account_secret}"

From eea048041a4e2a5fed2fba13ae86bac70f20cbb6 Mon Sep 17 00:00:00 2001
From: kethan1122 <kcherukuri@talend.com>
Date: Thu, 15 Jun 2023 14:46:29 +0530
Subject: [PATCH 14/22] merge conflict

---
 tests/tap_tester/test_mixpanel_bookmark.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/tests/tap_tester/test_mixpanel_bookmark.py b/tests/tap_tester/test_mixpanel_bookmark.py
index 032409b..ffec320 100644
--- a/tests/tap_tester/test_mixpanel_bookmark.py
+++ b/tests/tap_tester/test_mixpanel_bookmark.py
@@ -135,11 +135,7 @@ def bookmark_test_run(self):
                             replication_key_value,
                             first_bookmark_value_utc,
                             msg="First sync bookmark was set incorrectly,"
-<<<<<<< HEAD
                             "a record with a greater replication-key value was synced.",
-=======
-                            " a record with a greater replication-key value was synced.",
->>>>>>> master
                         )
 
                     for record in second_sync_messages:

From 3b33c20abcb9dbb4db4985f94590116d09b77295 Mon Sep 17 00:00:00 2001
From: kethan1122 <kcherukuri@talend.com>
Date: Thu, 15 Jun 2023 15:08:07 +0530
Subject: [PATCH 15/22] update unittests

---
 tests/unittests/test_error_handling.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/unittests/test_error_handling.py b/tests/unittests/test_error_handling.py
index 2d58efd..603be44 100644
--- a/tests/unittests/test_error_handling.py
+++ b/tests/unittests/test_error_handling.py
@@ -143,7 +143,6 @@ def test_perform_request_exception_handling(
         ["400 different timezone error", 400, mock_400_different_timezone(), client.MixpanelBadRequestError, "A validation exception has occurred. Please validate the timezone with the MixPanel UI under project settings."],
         ["400 timeout error", 400, MockResponse(400, text=timeout_400_error), client.MixpanelBadRequestError, "Timeout Error.(Please verify your credentials.)"],
         ["401 error", 401, MockResponse(401), client.MixpanelUnauthorizedError, "Invalid authorization credentials."],
-       # ["403 error", 403, MockResponse(403), client.MixpanelForbiddenError, "User is not a member of this project: project_id or this project is invalid"],
         ["404 error", 404, MockResponse(404), client.MixpanelNotFoundError, "The resource you have specified cannot be found."],
         ["404 error", 404, mock_send_error(), client.MixpanelNotFoundError, "Resource not found error message from API response field 'error'."],
         ["404 error", 404, mock_send_message(), client.MixpanelNotFoundError, "Resource not found error message from API response field 'message'."],

From dd67021ac890d42bba220d12b9efeccd7ce1efb7 Mon Sep 17 00:00:00 2001
From: kethan1122 <kcherukuri@talend.com>
Date: Thu, 15 Jun 2023 15:10:05 +0530
Subject: [PATCH 16/22] update unittests

---
 tests/unittests/test_error_handling.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/unittests/test_error_handling.py b/tests/unittests/test_error_handling.py
index 603be44..357414a 100644
--- a/tests/unittests/test_error_handling.py
+++ b/tests/unittests/test_error_handling.py
@@ -165,6 +165,7 @@ def test_check_access_exception_handling(
             project_id="project_id",
             api_domain="mock_api_domain",
             request_timeout=REQUEST_TIMEOUT,
+            auth_type="saa"
         )
         with self.assertRaises(error) as e:
             mock_client.check_access()
@@ -197,6 +198,7 @@ def test_request_with_handling_for_5xx_exception_handling(
             project_id="project_id",
             api_domain="mock_api_domain",
             request_timeout=REQUEST_TIMEOUT,
+            auth_type="saa"
         )
         with self.assertRaises(error):
             mock_client.perform_request("GET")
@@ -214,6 +216,7 @@ def test_check_access_handle_timeout_error(self, mock_request, mock_sleep):
             project_id="project_id",
             api_domain="mock_api_domain",
             request_timeout=REQUEST_TIMEOUT,
+            auth_type="saa"
         )
         with self.assertRaises(client.ReadTimeoutError):
             mock_client.check_access()

From 11fe4b41faa3e20affc1650ac846ee9a4291d612 Mon Sep 17 00:00:00 2001
From: kethan1122 <kcherukuri@talend.com>
Date: Thu, 15 Jun 2023 19:33:23 +0530
Subject: [PATCH 17/22] change project secret as API secret

---
 tap_mixpanel/__init__.py | 2 +-
 tap_mixpanel/client.py   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tap_mixpanel/__init__.py b/tap_mixpanel/__init__.py
index e2e1c91..e353734 100644
--- a/tap_mixpanel/__init__.py
+++ b/tap_mixpanel/__init__.py
@@ -72,7 +72,7 @@ def main():
 
     auth_type = parsed_args.config.get("auth_type")
     if not auth_type:
-        auth_type = "project_secret"
+        auth_type = "api_secret"
 
     with MixpanelClient(
         parsed_args.config.get("api_secret"),
diff --git a/tap_mixpanel/client.py b/tap_mixpanel/client.py
index 1e1e837..88f04a8 100644
--- a/tap_mixpanel/client.py
+++ b/tap_mixpanel/client.py
@@ -154,7 +154,7 @@ def __enter__(self):
         """
         Set auth_header with provided credentials. If credentials is not provided, then raise the exception.
         """
-        if self.__auth_type == 'project_secret' and self.__api_secret:
+        if self.__auth_type == 'api_secret' and self.__api_secret:
             self.auth_header = f"Basic {str(base64.urlsafe_b64encode(self.__api_secret.encode('utf-8')), 'utf-8')}"
         elif self.__service_account_username and self.__service_account_secret:
             service_account_auth = f"{self.__service_account_username}:{self.__service_account_secret}"

From a2f49331cc0b3330dd02f9b02cc909950da6337b Mon Sep 17 00:00:00 2001
From: kethan1122 <kcherukuri@talend.com>
Date: Thu, 15 Jun 2023 20:37:48 +0530
Subject: [PATCH 18/22] change default value

---
 tap_mixpanel/client.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tap_mixpanel/client.py b/tap_mixpanel/client.py
index 88f04a8..c747f4a 100644
--- a/tap_mixpanel/client.py
+++ b/tap_mixpanel/client.py
@@ -136,7 +136,7 @@ class MixpanelClient:
     The client class used for making REST calls to the Mixpanel API.
     """
     def __init__(self, api_secret, service_account_username, service_account_secret, project_id, api_domain,
-                 request_timeout, user_agent=None, auth_type='project_secret'):
+                 request_timeout, user_agent=None, auth_type='api_secret'):
         self.__api_secret = api_secret
         self.__service_account_username = service_account_username
         self.__service_account_secret = service_account_secret

From 7c8a2264116875e41a0312df8203c9c5dbbe4874 Mon Sep 17 00:00:00 2001
From: VishalP <20889199+Vi6hal@users.noreply.github.com>
Date: Tue, 4 Jul 2023 23:57:06 +0530
Subject: [PATCH 19/22] added config error and minor enhancement

---
 tap_mixpanel/__init__.py |  5 +++--
 tap_mixpanel/client.py   | 35 +++++++++++++----------------------
 2 files changed, 16 insertions(+), 24 deletions(-)

diff --git a/tap_mixpanel/__init__.py b/tap_mixpanel/__init__.py
index e353734..9054d47 100644
--- a/tap_mixpanel/__init__.py
+++ b/tap_mixpanel/__init__.py
@@ -70,8 +70,9 @@ def main():
     else:
         api_domain = "mixpanel.com"
 
-    auth_type = parsed_args.config.get("auth_type")
-    if not auth_type:
+    auth_type = parsed_args.config.get("auth_type","").lower()
+    # default to api_secret as authentication_type
+    if auth_type not in ("saa","api_secret"):
         auth_type = "api_secret"
 
     with MixpanelClient(
diff --git a/tap_mixpanel/client.py b/tap_mixpanel/client.py
index c747f4a..15f041b 100644
--- a/tap_mixpanel/client.py
+++ b/tap_mixpanel/client.py
@@ -17,6 +17,8 @@
 class ReadTimeoutError(Exception):
     """Custom error for request timeout."""
 
+class ConfigurationError(Exception):
+    """Custom error for incorrect configuration"""
 
 class Server5xxError(Exception):
     """Custom error class for all the 5xx error."""
@@ -156,12 +158,12 @@ def __enter__(self):
         """
         if self.__auth_type == 'api_secret' and self.__api_secret:
             self.auth_header = f"Basic {str(base64.urlsafe_b64encode(self.__api_secret.encode('utf-8')), 'utf-8')}"
-        elif self.__service_account_username and self.__service_account_secret:
+        elif self.__auth_type == 'saa' and self.__service_account_username and self.__service_account_secret:
             service_account_auth = f"{self.__service_account_username}:{self.__service_account_secret}"
             self.auth_header = f"Basic {str(base64.urlsafe_b64encode(service_account_auth.encode('utf-8')), 'utf-8')}"
         else:
-            raise Exception("Error: Missing api_secret or service account username/secret in tap config.json")
-
+            raise ConfigurationError("Error: Missing api_secret or service account username/secret in tap config.json")
+        print(self.__auth_type)
         self.__verified = self.check_access()
         return self
 
@@ -192,9 +194,7 @@ def check_access(self):
         if self.__user_agent:
             headers["User-Agent"] = self.__user_agent
         headers["Accept"] = "application/json"
-        headers[
-            "Authorization"
-        ] = self.auth_header
+        headers["Authorization"] = self.auth_header
 
         if self.__project_id:
             params["project_id"] = self.__project_id
@@ -321,9 +321,7 @@ def request(self, method, url=None, path=None, params=None, json=None, **kwargs)
             else:
                 params = f"{params}&project_id={self.__project_id}"
 
-        kwargs["headers"][
-            "Authorization"
-        ] = self.auth_header
+        kwargs["headers"]["Authorization"] = self.auth_header
         with metrics.http_request_timer(endpoint) as timer:
             response = self.perform_request(
                 method=method, url=url, params=params, json=json, **kwargs
@@ -349,19 +347,15 @@ def request_export(
         Yields:
             dict: Records of export stream.
         """
-        if not self.__verified:
-            self.__verified = self.check_access()
+
+        self.__verified = self.__verified if self.__verified else self.check_access()
 
         if url and path:
             url = f"{url}/{path}"
         elif path and not url:
             url = f"https://{self.__api_domain}/api/2.0/{path}"
-
-        if "endpoint" in kwargs:
-            endpoint = kwargs["endpoint"]
-            del kwargs["endpoint"]
-        else:
-            endpoint = "export"
+        
+        endpoint = kwargs.pop("endpoint","export")
 
         if self.__project_id:
             if isinstance(params, dict):
@@ -369,8 +363,7 @@ def request_export(
             else:
                 params = f"{params}&project_id={self.__project_id}"
 
-        if "headers" not in kwargs:
-            kwargs["headers"] = {}
+        kwargs["headers"] = kwargs.get("headers",{})
 
         kwargs["headers"]["Accept"] = "application/json"
 
@@ -380,9 +373,7 @@ def request_export(
         if method == "POST":
             kwargs["headers"]["Content-Type"] = "application/json"
 
-        kwargs["headers"][
-            "Authorization"
-        ] = self.auth_header
+        kwargs["headers"]["Authorization"] = self.auth_header
         with metrics.http_request_timer(endpoint) as timer:
             response = self.perform_request(
                 method=method, url=url, params=params, json=json, stream=True, **kwargs

From de0239c35f31682738039a9c316fe7264a2b349f Mon Sep 17 00:00:00 2001
From: VishalP <20889199+Vi6hal@users.noreply.github.com>
Date: Wed, 5 Jul 2023 00:04:40 +0530
Subject: [PATCH 20/22] fixed pylint

---
 README.md              | 1 +
 tap_mixpanel/client.py | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 1d10b8c..2f623d5 100644
--- a/README.md
+++ b/README.md
@@ -134,6 +134,7 @@ More details may be found in the [Mixpanel API Authentication](https://developer
 3. Create your tap's `config.json` file.  The tap config file for this tap should include these entries:
    - `start_date` - the default value to use if no bookmark exists for an endpoint (rfc3339 date string)
    - `user_agent` (string, optional): Process and email for API logging purposes. Example: `tap-mixpanel <api_user_email@your_company.com>`
+   - `auth_type` (`saa` or `api_secret`): Used to toggle between [service account authentication](https://developer.mixpanel.com/reference/service-accounts) and [api secret based authentication](https://docs.mixpanel.com/docs/tracking/how-tos/api-credentials#api-secret), it is recommended by mixpanel to use service account authentication
    - `api_secret` (string, `ABCdef123`): an API secret for each project in Mixpanel. This can be found in the Mixpanel Console, upper-right Settings (gear icon), Organization Settings > Projects and in the Access Keys section. For this tap, only the api_secret is needed (the api_key is legacy and the token is used only for uploading data). Each Mixpanel project has a different api_secret; therefore each Singer tap pipeline instance is for a single project.
    - `service_account_username` (string, `username12`): Username of the service account.
    - `service_account_secret` (string, `ABCdef123`): Secret of the service account.
diff --git a/tap_mixpanel/client.py b/tap_mixpanel/client.py
index 15f041b..e2e6bfd 100644
--- a/tap_mixpanel/client.py
+++ b/tap_mixpanel/client.py
@@ -163,7 +163,7 @@ def __enter__(self):
             self.auth_header = f"Basic {str(base64.urlsafe_b64encode(service_account_auth.encode('utf-8')), 'utf-8')}"
         else:
             raise ConfigurationError("Error: Missing api_secret or service account username/secret in tap config.json")
-        print(self.__auth_type)
+
         self.__verified = self.check_access()
         return self
 
@@ -354,7 +354,7 @@ def request_export(
             url = f"{url}/{path}"
         elif path and not url:
             url = f"https://{self.__api_domain}/api/2.0/{path}"
-        
+
         endpoint = kwargs.pop("endpoint","export")
 
         if self.__project_id:

From f2098aad8a68670dc14f2ddd69fe24f80aaef8a8 Mon Sep 17 00:00:00 2001
From: VishalP <20889199+Vi6hal@users.noreply.github.com>
Date: Wed, 5 Jul 2023 00:26:55 +0530
Subject: [PATCH 21/22] fixed ut

---
 tests/unittests/test_service_account_authentication.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/unittests/test_service_account_authentication.py b/tests/unittests/test_service_account_authentication.py
index 660b99d..c75189d 100644
--- a/tests/unittests/test_service_account_authentication.py
+++ b/tests/unittests/test_service_account_authentication.py
@@ -27,7 +27,7 @@ def test_service_account_creds(self, mock_check_access):
         Args:
             mock_check_access: Mock the check_access method to test authentication.
         """
-        with MixpanelClient(None, "service_account_username", "service_account_secret", "project_id","api_domain", 300) as client_:
+        with MixpanelClient(None, "service_account_username", "service_account_secret", "project_id","api_domain", 300, auth_type="saa") as client_:
             pass
         
         self.assertEqual(client_.auth_header, "Basic c2VydmljZV9hY2NvdW50X3VzZXJuYW1lOnNlcnZpY2VfYWNjb3VudF9zZWNyZXQ=")
@@ -55,7 +55,7 @@ def test_check_access_403_error_for_service_account_creds(self, mock_logger, moc
             mock_request: Mock Session.request to explicitly raise the forbidden(403) error.
         """
         with self.assertRaises(MixpanelForbiddenError):
-            with MixpanelClient(None, "service_account_username", "service_account_secret", "project_id","api_domain", 300) as client_:
+            with MixpanelClient(None, "service_account_username", "service_account_secret", "project_id","api_domain", 300, auth_type="saa") as client_:
                     client_.check_access()
     
         mock_logger.assert_called_with('HTTP-error-code: 403, Error: User is not a member of this project: %s or this project is invalid', 'project_id')

From 159e9ace70bf7d963e80b6c1128b59e47b43a4b1 Mon Sep 17 00:00:00 2001
From: Vi6hal <20889199+Vi6hal@users.noreply.github.com>
Date: Wed, 5 Jul 2023 02:12:24 +0000
Subject: [PATCH 22/22] updated tests to run in ssa mode

---
 tests/tap_tester/test_mixpanel_all_fields_pagination.py | 5 +++++
 tests/tap_tester/test_mixpanel_automatic_fields.py      | 5 +++++
 tests/tap_tester/test_mixpanel_bookmark.py              | 5 +++++
 tests/tap_tester/test_mixpanel_discovery.py             | 5 +++++
 tests/tap_tester/test_mixpanel_start_date.py            | 6 ++++++
 5 files changed, 26 insertions(+)

diff --git a/tests/tap_tester/test_mixpanel_all_fields_pagination.py b/tests/tap_tester/test_mixpanel_all_fields_pagination.py
index 42587b0..229a669 100644
--- a/tests/tap_tester/test_mixpanel_all_fields_pagination.py
+++ b/tests/tap_tester/test_mixpanel_all_fields_pagination.py
@@ -152,3 +152,8 @@ def test_run(self):
         # Pagination test for EU residency server
         self.eu_residency = True
         self.pagination_test_run()
+
+    def test_run_ssa(self):
+        # perform checks with service account auth
+        self.service_account_authentication = True
+        self.pagination_test_run()
\ No newline at end of file
diff --git a/tests/tap_tester/test_mixpanel_automatic_fields.py b/tests/tap_tester/test_mixpanel_automatic_fields.py
index 72d0f2b..18922d8 100644
--- a/tests/tap_tester/test_mixpanel_automatic_fields.py
+++ b/tests/tap_tester/test_mixpanel_automatic_fields.py
@@ -65,3 +65,8 @@ def test_standard_auto_fields(self):
         """Automatic fields test for standard server"""
         self.eu_residency = False
         self.automatic_fields_test_run()
+
+    def test_run_ssa(self):
+        # perform checks with service account auth
+        self.service_account_authentication = True
+        self.automatic_fields_test_run()
\ No newline at end of file
diff --git a/tests/tap_tester/test_mixpanel_bookmark.py b/tests/tap_tester/test_mixpanel_bookmark.py
index ffec320..2274ba3 100644
--- a/tests/tap_tester/test_mixpanel_bookmark.py
+++ b/tests/tap_tester/test_mixpanel_bookmark.py
@@ -189,3 +189,8 @@ def test_standard_bookmarks(self):
         """Bookmark test for standard server."""
         self.eu_residency = False
         self.bookmark_test_run()
+
+    def test_run_ssa(self):
+        # perform checks with ssa auth
+        self.service_account_authentication = True
+        self.bookmark_test_run()
\ No newline at end of file
diff --git a/tests/tap_tester/test_mixpanel_discovery.py b/tests/tap_tester/test_mixpanel_discovery.py
index b578dac..43c73e9 100644
--- a/tests/tap_tester/test_mixpanel_discovery.py
+++ b/tests/tap_tester/test_mixpanel_discovery.py
@@ -188,3 +188,8 @@ def test_eu_discovery(self):
         """Discovery test for EU residency server."""
         self.eu_residency = True
         self.discovery_test_run()
+
+    def test_run_ssa(self):
+        # perform checks with ssa auth
+        self.service_account_authentication = True
+        self.discovery_test_run()
\ No newline at end of file
diff --git a/tests/tap_tester/test_mixpanel_start_date.py b/tests/tap_tester/test_mixpanel_start_date.py
index ea819ce..e12ccd7 100644
--- a/tests/tap_tester/test_mixpanel_start_date.py
+++ b/tests/tap_tester/test_mixpanel_start_date.py
@@ -174,3 +174,9 @@ def test_run(self):
         # Start date test for standard server
         self.eu_residency = False
         self.start_date_test_run()
+
+    def test_run_ssa(self):
+        # perform checks with ssa auth
+        self.service_account_authentication = True
+        self.start_date_test_run()
+