diff --git a/.circleci/config.yml b/.circleci/config.yml index e45b7bd..38d3008 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -23,7 +23,7 @@ jobs: source /usr/local/share/virtualenvs/tap-mixpanel/bin/activate source dev_env.sh pip install pylint - pylint tap_mixpanel -d "$PYLINT_DISABLE_LIST,too-many-statements,protected-access,redefined-builtin" + pylint tap_mixpanel -d "$PYLINT_DISABLE_LIST,too-many-statements,protected-access,redefined-builtin,too-many-instance-attributes" - run: name: 'JSON Validator' command: | diff --git a/README.md b/README.md index e863a3b..2f623d5 100644 --- a/README.md +++ b/README.md @@ -134,7 +134,11 @@ More details may be found in the [Mixpanel API Authentication](https://developer 3. Create your tap's `config.json` file. The tap config file for this tap should include these entries: - `start_date` - the default value to use if no bookmark exists for an endpoint (rfc3339 date string) - `user_agent` (string, optional): Process and email for API logging purposes. Example: `tap-mixpanel ` + - `auth_type` (`saa` or `api_secret`): Used to toggle between [service account authentication](https://developer.mixpanel.com/reference/service-accounts) and [api secret based authentication](https://docs.mixpanel.com/docs/tracking/how-tos/api-credentials#api-secret), it is recommended by mixpanel to use service account authentication - `api_secret` (string, `ABCdef123`): an API secret for each project in Mixpanel. This can be found in the Mixpanel Console, upper-right Settings (gear icon), Organization Settings > Projects and in the Access Keys section. For this tap, only the api_secret is needed (the api_key is legacy and the token is used only for uploading data). Each Mixpanel project has a different api_secret; therefore each Singer tap pipeline instance is for a single project. + - `service_account_username` (string, `username12`): Username of the service account. + - `service_account_secret` (string, `ABCdef123`): Secret of the service account. + - `project_id` (string, `10451202`): Id of the project which is connected to the provided service account. - `date_window_size` (integer, `30`): Number of days for date window looping through transactional endpoints with from_date and to_date. Default date_window_size is 30 days. Clients with large volumes of events may want to decrease this to 14, 7, or even down to 1-2 days. - `attribution_window` (integer, `5`): Latency minimum number of days to look-back to account for delays in attributing accurate results. [Default attribution window is 5 days](https://help.mixpanel.com/hc/en-us/articles/115004616486-Tracking-If-Users-Are-Offline). - `project_timezone` (string like `US/Pacific`): Time zone in which integer date times are stored. The project timezone may be found in the project settings in the Mixpanel console. [More info about timezones](https://help.mixpanel.com/hc/en-us/articles/115004547203-Manage-Timezones-for-Projects-in-Mixpanel). diff --git a/tap_mixpanel/__init__.py b/tap_mixpanel/__init__.py index 3745a8a..9054d47 100644 --- a/tap_mixpanel/__init__.py +++ b/tap_mixpanel/__init__.py @@ -17,7 +17,6 @@ REQUEST_TIMEOUT = 300 REQUIRED_CONFIG_KEYS = [ "project_timezone", - "api_secret", "attribution_window", "start_date", "user_agent", @@ -71,11 +70,20 @@ def main(): else: api_domain = "mixpanel.com" + auth_type = parsed_args.config.get("auth_type","").lower() + # default to api_secret as authentication_type + if auth_type not in ("saa","api_secret"): + auth_type = "api_secret" + with MixpanelClient( - parsed_args.config["api_secret"], + parsed_args.config.get("api_secret"), + parsed_args.config.get("service_account_username"), + parsed_args.config.get("service_account_secret"), + parsed_args.config.get("project_id"), api_domain, request_timeout, parsed_args.config["user_agent"], + auth_type ) as client: state = {} diff --git a/tap_mixpanel/client.py b/tap_mixpanel/client.py index 4e49a5d..c0bf54e 100644 --- a/tap_mixpanel/client.py +++ b/tap_mixpanel/client.py @@ -17,6 +17,8 @@ class ReadTimeoutError(Exception): """Custom error for request timeout.""" +class ConfigurationError(Exception): + """Custom error for incorrect configuration""" class Server5xxError(Exception): """Custom error class for all the 5xx error.""" @@ -135,16 +137,33 @@ class MixpanelClient: """ The client class used for making REST calls to the Mixpanel API. """ - def __init__(self, api_secret, api_domain, request_timeout, user_agent=None): + def __init__(self, api_secret, service_account_username, service_account_secret, project_id, api_domain, + request_timeout, user_agent=None, auth_type='api_secret'): self.__api_secret = api_secret + self.__service_account_username = service_account_username + self.__service_account_secret = service_account_secret + self.__project_id = project_id self.__api_domain = api_domain self.__request_timeout = request_timeout self.__user_agent = user_agent self.__session = requests.Session() + self.__auth_type = auth_type self.__verified = False + self.auth_header = None self.disable_engage_endpoint = False def __enter__(self): + """ + Set auth_header with provided credentials. If credentials is not provided, then raise the exception. + """ + if self.__auth_type == 'api_secret' and self.__api_secret: + self.auth_header = f"Basic {str(base64.urlsafe_b64encode(self.__api_secret.encode('utf-8')), 'utf-8')}" + elif self.__auth_type == 'saa' and self.__service_account_username and self.__service_account_secret: + service_account_auth = f"{self.__service_account_username}:{self.__service_account_secret}" + self.auth_header = f"Basic {str(base64.urlsafe_b64encode(service_account_auth.encode('utf-8')), 'utf-8')}" + else: + raise ConfigurationError("Error: Missing api_secret or service account username/secret in tap config.json") + self.__verified = self.check_access() return self @@ -168,24 +187,31 @@ def check_access(self): bool: Returns true if credentials are verified. (else raises Exception) """ - if self.__api_secret is None: - raise Exception("Error: Missing api_secret in tap config.json.") headers = {} + params = {} # Endpoint: simple API call to return a single record (org settings) to test access url = f"https://{self.__api_domain}/api/2.0/engage" if self.__user_agent: headers["User-Agent"] = self.__user_agent headers["Accept"] = "application/json" - headers[ - "Authorization" - ] = f"Basic {str(base64.urlsafe_b64encode(self.__api_secret.encode('utf-8')), 'utf-8')}" + headers["Authorization"] = self.auth_header + if self.__project_id: + params["project_id"] = self.__project_id try: response = self.__session.get( url=url, + params=params, timeout=self.__request_timeout, # Request timeout parameter headers=headers, ) + + if response.status_code == 403: + LOGGER.error( + "HTTP-error-code: 403, Error: User is not a member of this project: %s or this project is invalid", + self.__project_id) + raise MixpanelForbiddenError from None + except requests.exceptions.Timeout as err: LOGGER.error("TIMEOUT ERROR: %s", str(err)) raise ReadTimeoutError from None @@ -289,9 +315,13 @@ def request(self, method, url=None, path=None, params=None, json=None, **kwargs) if method == "POST": kwargs["headers"]["Content-Type"] = "application/json" - kwargs["headers"][ - "Authorization" - ] = f"Basic {str(base64.urlsafe_b64encode(self.__api_secret.encode('utf-8')), 'utf-8')}" + if self.__project_id: + if isinstance(params, dict): + params['project_id'] = self.__project_id + else: + params = f"{params}&project_id={self.__project_id}" + + kwargs["headers"]["Authorization"] = self.auth_header with metrics.http_request_timer(endpoint) as timer: response = self.perform_request( method=method, url=url, params=params, json=json, **kwargs @@ -317,22 +347,23 @@ def request_export( Yields: dict: Records of export stream. """ - if not self.__verified: - self.__verified = self.check_access() + + self.__verified = self.__verified if self.__verified else self.check_access() if url and path: url = f"{url}/{path}" elif path and not url: url = f"https://{self.__api_domain}/api/2.0/{path}" - if "endpoint" in kwargs: - endpoint = kwargs["endpoint"] - del kwargs["endpoint"] - else: - endpoint = "export" + endpoint = kwargs.pop("endpoint","export") - if "headers" not in kwargs: - kwargs["headers"] = {} + if self.__project_id: + if isinstance(params, dict): + params['project_id'] = self.__project_id + else: + params = f"{params}&project_id={self.__project_id}" + + kwargs["headers"] = kwargs.get("headers",{}) kwargs["headers"]["Accept"] = "application/json" @@ -342,9 +373,7 @@ def request_export( if method == "POST": kwargs["headers"]["Content-Type"] = "application/json" - kwargs["headers"][ - "Authorization" - ] = f"Basic {str(base64.urlsafe_b64encode(self.__api_secret.encode('utf-8')), 'utf-8')}" + kwargs["headers"]["Authorization"] = self.auth_header with metrics.http_request_timer(endpoint) as timer: response = self.perform_request( method=method, url=url, params=params, json=json, stream=True, **kwargs diff --git a/tests/tap_tester/base.py b/tests/tap_tester/base.py index 2f868e0..fee0472 100644 --- a/tests/tap_tester/base.py +++ b/tests/tap_tester/base.py @@ -29,6 +29,7 @@ class TestMixPanelBase(BaseCase): start_date = "" end_date = "" eu_residency = True + service_account_authentication = False export_events = os.getenv("TAP_MIXPANEL_EXPORT_EVENTS") def tap_name(self): @@ -82,6 +83,10 @@ def setUp(self): missing_envs = [] if self.eu_residency: creds = {"api_secret": "TAP_MIXPANEL_EU_RESIDENCY_API_SECRET"} + elif self.service_account_authentication: + creds = {"service_account_username": "TAP_MIXPANEL_SERVICE_ACCOUNT_USERNAME", + "service_account_secret": "TAP_MIXPANEL_SERVICE_ACCOUNT_SECRET", + "project_id": "TAP_MIXPANEL_SERVICE_ACCOUNT_PROJECT_ID"} else: creds = {"api_secret": "TAP_MIXPANEL_API_SECRET"} @@ -138,6 +143,10 @@ def get_credentials(self): credentials_dict = {} if self.eu_residency: creds = {"api_secret": "TAP_MIXPANEL_EU_RESIDENCY_API_SECRET"} + elif self.service_account_authentication: + creds = {"service_account_username": "TAP_MIXPANEL_SERVICE_ACCOUNT_USERNAME", + "service_account_secret": "TAP_MIXPANEL_SERVICE_ACCOUNT_SECRET", + "project_id": "TAP_MIXPANEL_SERVICE_ACCOUNT_PROJECT_ID"} else: creds = {"api_secret": "TAP_MIXPANEL_API_SECRET"} diff --git a/tests/tap_tester/test_mixpanel_all_fields_pagination.py b/tests/tap_tester/test_mixpanel_all_fields_pagination.py index 42587b0..229a669 100644 --- a/tests/tap_tester/test_mixpanel_all_fields_pagination.py +++ b/tests/tap_tester/test_mixpanel_all_fields_pagination.py @@ -152,3 +152,8 @@ def test_run(self): # Pagination test for EU residency server self.eu_residency = True self.pagination_test_run() + + def test_run_ssa(self): + # perform checks with service account auth + self.service_account_authentication = True + self.pagination_test_run() \ No newline at end of file diff --git a/tests/tap_tester/test_mixpanel_automatic_fields.py b/tests/tap_tester/test_mixpanel_automatic_fields.py index 72d0f2b..18922d8 100644 --- a/tests/tap_tester/test_mixpanel_automatic_fields.py +++ b/tests/tap_tester/test_mixpanel_automatic_fields.py @@ -65,3 +65,8 @@ def test_standard_auto_fields(self): """Automatic fields test for standard server""" self.eu_residency = False self.automatic_fields_test_run() + + def test_run_ssa(self): + # perform checks with service account auth + self.service_account_authentication = True + self.automatic_fields_test_run() \ No newline at end of file diff --git a/tests/tap_tester/test_mixpanel_bookmark.py b/tests/tap_tester/test_mixpanel_bookmark.py index fbc3e9a..2274ba3 100644 --- a/tests/tap_tester/test_mixpanel_bookmark.py +++ b/tests/tap_tester/test_mixpanel_bookmark.py @@ -135,7 +135,7 @@ def bookmark_test_run(self): replication_key_value, first_bookmark_value_utc, msg="First sync bookmark was set incorrectly," - " a record with a greater replication-key value was synced.", + "a record with a greater replication-key value was synced.", ) for record in second_sync_messages: @@ -189,3 +189,8 @@ def test_standard_bookmarks(self): """Bookmark test for standard server.""" self.eu_residency = False self.bookmark_test_run() + + def test_run_ssa(self): + # perform checks with ssa auth + self.service_account_authentication = True + self.bookmark_test_run() \ No newline at end of file diff --git a/tests/tap_tester/test_mixpanel_discovery.py b/tests/tap_tester/test_mixpanel_discovery.py index b578dac..43c73e9 100644 --- a/tests/tap_tester/test_mixpanel_discovery.py +++ b/tests/tap_tester/test_mixpanel_discovery.py @@ -188,3 +188,8 @@ def test_eu_discovery(self): """Discovery test for EU residency server.""" self.eu_residency = True self.discovery_test_run() + + def test_run_ssa(self): + # perform checks with ssa auth + self.service_account_authentication = True + self.discovery_test_run() \ No newline at end of file diff --git a/tests/tap_tester/test_mixpanel_start_date.py b/tests/tap_tester/test_mixpanel_start_date.py index ea819ce..e12ccd7 100644 --- a/tests/tap_tester/test_mixpanel_start_date.py +++ b/tests/tap_tester/test_mixpanel_start_date.py @@ -174,3 +174,9 @@ def test_run(self): # Start date test for standard server self.eu_residency = False self.start_date_test_run() + + def test_run_ssa(self): + # perform checks with ssa auth + self.service_account_authentication = True + self.start_date_test_run() + diff --git a/tests/unittests/test_client.py b/tests/unittests/test_client.py index 5f85eee..bf44292 100644 --- a/tests/unittests/test_client.py +++ b/tests/unittests/test_client.py @@ -36,6 +36,9 @@ def test_request_with_url(self, mock_perform_request, mock_check_access): mock_perform_request.return_value = MockResponse() mock_client = client.MixpanelClient( api_secret="mock_api_secret", + service_account_username="mock_service_account_username", + service_account_secret="service_account_secret", + project_id="project_id", api_domain="mock_api_domain", request_timeout=300, user_agent="USER_AGENT" @@ -61,6 +64,9 @@ def test_request_without_url(self, mock_perform_request, mock_check_access): mock_perform_request.return_value = MockResponse() mock_client = client.MixpanelClient( api_secret="mock_api_secret", + service_account_username="mock_service_account_username", + service_account_secret="service_account_secret", + project_id="project_id", api_domain="mock_api_domain", request_timeout=300, user_agent="USER_AGENT" @@ -84,6 +90,9 @@ def test_request_export_with_url(self, mock_perform_request, mock_check_access): mock_perform_request.return_value = MockResponse() mock_client = client.MixpanelClient( api_secret="mock_api_secret", + service_account_username="mock_service_account_username", + service_account_secret="service_account_secret", + project_id="project_id", api_domain="mock_api_domain", request_timeout=300, user_agent="USER_AGENT" @@ -119,6 +128,9 @@ def test_request_export_without_url(self, mock_perform_request, mock_check_acces mock_perform_request.return_value = MockResponse() mock_client = client.MixpanelClient( api_secret="mock_api_secret", + service_account_username="mock_service_account_username", + service_account_secret="service_account_secret", + project_id="project_id", api_domain="mock_api_domain", request_timeout=300, user_agent="USER_AGENT" diff --git a/tests/unittests/test_error_handling.py b/tests/unittests/test_error_handling.py index 0b973c8..1e7aecb 100644 --- a/tests/unittests/test_error_handling.py +++ b/tests/unittests/test_error_handling.py @@ -124,6 +124,9 @@ def test_perform_request_exception_handling( mock_request.return_value = mock_response mock_client = client.MixpanelClient( api_secret="mock_api_secret", + service_account_username="mock_service_account_username", + service_account_secret="service_account_secret", + project_id="project_id", api_domain="mock_api_domain", request_timeout=REQUEST_TIMEOUT, ) @@ -142,7 +145,6 @@ def test_perform_request_exception_handling( ["400 different timezone error", 400, mock_400_different_timezone(), client.MixpanelBadRequestError, "A validation exception has occurred. Please validate the timezone with the MixPanel UI under project settings."], ["400 timeout error", 400, MockResponse(400, text=timeout_400_error), client.MixpanelBadRequestError, "Timeout Error.(Please verify your credentials.)"], ["401 error", 401, MockResponse(401), client.MixpanelUnauthorizedError, "Invalid authorization credentials."], - ["403 error", 403, MockResponse(403), client.MixpanelForbiddenError, "User does not have permission to access the resource."], ["404 error", 404, MockResponse(404), client.MixpanelNotFoundError, "The resource you have specified cannot be found."], ["404 error", 404, mock_send_error(), client.MixpanelNotFoundError, "Resource not found error message from API response field 'error'."], ["404 error", 404, mock_send_message(), client.MixpanelNotFoundError, "Resource not found error message from API response field 'message'."], @@ -160,8 +162,12 @@ def test_check_access_exception_handling( mock_request.return_value = mock_response mock_client = client.MixpanelClient( api_secret="mock_api_secret", + service_account_username="mock_service_account_username", + service_account_secret="service_account_secret", + project_id="project_id", api_domain="mock_api_domain", request_timeout=REQUEST_TIMEOUT, + auth_type="saa" ) with self.assertRaises(error) as e: mock_client.check_access() @@ -189,8 +195,12 @@ def test_request_with_handling_for_5xx_exception_handling( mock_request.return_value = mock_response mock_client = client.MixpanelClient( api_secret="mock_api_secret", + service_account_username="mock_service_account_username", + service_account_secret="service_account_secret", + project_id="project_id", api_domain="mock_api_domain", request_timeout=REQUEST_TIMEOUT, + auth_type="saa" ) with self.assertRaises(error): mock_client.perform_request("GET") @@ -203,8 +213,12 @@ def test_check_access_handle_timeout_error(self, mock_request, mock_sleep): """ mock_client = client.MixpanelClient( api_secret="mock_api_secret", + service_account_username="mock_service_account_username", + service_account_secret="service_account_secret", + project_id="project_id", api_domain="mock_api_domain", request_timeout=REQUEST_TIMEOUT, + auth_type="saa" ) with self.assertRaises(client.ReadTimeoutError): mock_client.check_access() @@ -224,6 +238,9 @@ def test_check_access_402_exception_handling( mock_request.return_value = MockResponse(402) mock_client = client.MixpanelClient( api_secret="mock_api_secret", + service_account_username="mock_service_account_username", + service_account_secret="service_account_secret", + project_id="project_id", api_domain="mock_api_domain", request_timeout=REQUEST_TIMEOUT, ) @@ -245,7 +262,12 @@ def test_check_access_handle_timeout_error(self, mock_request, mock_time): """ Check whether the request backoffs properly for `check_access` method for 5 times in case of Timeout error. """ - mock_client = client.MixpanelClient(api_secret="mock_api_secret", api_domain="mock_api_domain", request_timeout=REQUEST_TIMEOUT) + mock_client = client.MixpanelClient(api_secret="mock_api_secret", + service_account_username="mock_service_account_username", + service_account_secret="service_account_secret", + project_id="project_id", + api_domain="mock_api_domain", + request_timeout=REQUEST_TIMEOUT) with self.assertRaises(requests.models.ProtocolError): mock_client.check_access() diff --git a/tests/unittests/test_request_timeout_param_value.py b/tests/unittests/test_request_timeout_param_value.py index 0c7e802..a9f6a47 100644 --- a/tests/unittests/test_request_timeout_param_value.py +++ b/tests/unittests/test_request_timeout_param_value.py @@ -1,6 +1,5 @@ import unittest from unittest import mock - from parameterized import parameterized from tap_mixpanel.__init__ import main @@ -76,6 +75,7 @@ def test_request_timeout_for_none_param_value( "https://mixpanel.com/api/2.0/engage", allow_redirects=True, headers=HEADER, + params={}, timeout=REQUEST_TIMEOUT_DEFAULT, ) @@ -106,5 +106,6 @@ def test_request_timeout( "https://mixpanel.com/api/2.0/engage", allow_redirects=True, headers=HEADER, + params={}, timeout=expected_value, ) diff --git a/tests/unittests/test_service_account_authentication.py b/tests/unittests/test_service_account_authentication.py new file mode 100644 index 0000000..c75189d --- /dev/null +++ b/tests/unittests/test_service_account_authentication.py @@ -0,0 +1,61 @@ +import unittest +from unittest import mock +from tests.unittests.test_error_handling import MockResponse +from tap_mixpanel.client import MixpanelClient, MixpanelForbiddenError + +class TestServiceAccountAuthentication(unittest.TestCase): + """ + Test that tap do authentication with service account credentials without any error if it is provided. + """ + + @mock.patch("tap_mixpanel.client.MixpanelClient.check_access") + def test_token_creds(self, mock_check_access): + """Test authentication with token credentials(api_secret). + + Args: + mock_check_access: Mock the check_access method to test authentication. + """ + with MixpanelClient("api_secret", None, None, None,"api_domain", 300) as client_: + pass + + self.assertEqual(client_.auth_header, "Basic YXBpX3NlY3JldA==") + + @mock.patch("tap_mixpanel.client.MixpanelClient.check_access") + def test_service_account_creds(self, mock_check_access): + """Test authentication with service account credentials(username, secret). + + Args: + mock_check_access: Mock the check_access method to test authentication. + """ + with MixpanelClient(None, "service_account_username", "service_account_secret", "project_id","api_domain", 300, auth_type="saa") as client_: + pass + + self.assertEqual(client_.auth_header, "Basic c2VydmljZV9hY2NvdW50X3VzZXJuYW1lOnNlcnZpY2VfYWNjb3VudF9zZWNyZXQ=") + + @mock.patch("tap_mixpanel.client.MixpanelClient.check_access") + def test_no_creds(self, mock_check_access): + """Test that tap throws an error if credentials is not provided. + + Args: + mock_check_access: Mock the check_access method to test authentication. + """ + with self.assertRaises(Exception) as e: + with MixpanelClient(None, None, None, None,"api_domain", 300) as client_: + pass + + self.assertEqual(str(e.exception), "Error: Missing api_secret or service account username/secret in tap config.json") + + @mock.patch("requests.Session.request", return_value = MockResponse(403)) + @mock.patch("tap_mixpanel.client.LOGGER.error") + def test_check_access_403_error_for_service_account_creds(self, mock_logger, mock_request): + """Test that tap handles 403 error with proper message. + + Args: + mock_logger: Mock of LOGGER to verify the logger message + mock_request: Mock Session.request to explicitly raise the forbidden(403) error. + """ + with self.assertRaises(MixpanelForbiddenError): + with MixpanelClient(None, "service_account_username", "service_account_secret", "project_id","api_domain", 300, auth_type="saa") as client_: + client_.check_access() + + mock_logger.assert_called_with('HTTP-error-code: 403, Error: User is not a member of this project: %s or this project is invalid', 'project_id') diff --git a/tests/unittests/test_support_eu_endpoints.py b/tests/unittests/test_support_eu_endpoints.py index 2825c25..c09e7e8 100644 --- a/tests/unittests/test_support_eu_endpoints.py +++ b/tests/unittests/test_support_eu_endpoints.py @@ -98,7 +98,7 @@ def test_support_eu_endpoints_except_export( state = {} catalog = MockCatalog("revenue") - client = MixpanelClient("", "", "") + client = MixpanelClient("", "", "", "", "", "") revenue_obj = Revenue(client) revenue_obj.sync( catalog=catalog, @@ -153,7 +153,7 @@ def test_support_export_eu_endpoint( state = {} catalog = MockCatalog("export") - client = MixpanelClient("", "", "") + client = MixpanelClient("", "", "", "", "", "") export_obj = Export(client) export_obj.sync( catalog=catalog, @@ -221,6 +221,7 @@ def test_support_eu_endpoint_in_discover( "https://eu.mixpanel.com/api/2.0/engage", allow_redirects=True, headers=header, + params={}, timeout=300, ) @@ -235,5 +236,6 @@ def test_support_eu_endpoint_in_discover( "https://mixpanel.com/api/2.0/engage", allow_redirects=True, headers=header, + params={}, timeout=300, )