diff --git a/README.md b/README.md index 4612ff4..7d05c1a 100644 --- a/README.md +++ b/README.md @@ -1,25 +1,83 @@ # Ads-Library-API-Script-Repository -Ads-Library-API-Script-Repository is a set of code examples to help user/researchers understand how the Facebook Ads Library API works. It also provides a simple command-line interface(CLI) for users to easily use the Facebook Ads Library API. +Ads-Library-API-Script-Repository is a set of code examples to help users/researchers understand how the Facebook Ads Library API works. It also provides a simple command-line interface (CLI) for users to easily use the Facebook Ads Library API. ## Examples -Here's an example on how to use the CLI: +Here's an example of how to use the CLI: - $ python fb_ads_library_api_cli.py -t {access_token} -f 'page_id,ad_snapshot_url,funding_entity,ad_delivery_start_time' -c 'CA' -s '.' -v count + $ python fb_ads_library_api_cli.py -t {access_token} -f 'page_id,ad_snapshot_url,funding_entity,ad_delivery_start_time' -c 'CA' -s 'election' -v save_to_csv output.csv -It would count the number of all polictical ads in CA(Canada); +This command will save the details of all political ads in Canada (CA) containing the term 'election' to a CSV file named `output.csv`. -Note: please replace the '{access_token}' with your [Facebook Developer access token](https://developers.facebook.com/tools/accesstoken/). +Note: please replace the `{access_token}` with your [Facebook Developer access token](https://developers.facebook.com/tools/accesstoken/). ## Requirements -Ads-Library-API-Script-Repository requires or works with -* Mac OS X or Linux or Window +Ads-Library-API-Script-Repository requires or works with: +* Mac OS X, Linux, or Windows * Python 3.0+ * Python Requests Library ([installation](https://docs.python-requests.org/en/master/user/install/#install)) * Python iso3166 Library ([installation](https://pypi.org/project/iso3166/)) +## Features +The script provides the following features: +* Query the [Facebook Ads Library API](https://www.facebook.com/ads/library/api) to get all the Ads Library information on the Facebook platform. +* Filter ads by various parameters such as country, ad type, delivery date, audience size, and more. +* Save the results to a CSV file or perform other actions as defined by the CLI. -## How Ads-Library-API-Script-Repository works -The script will query the [Facebook Ads library API](https://www.facebook.com/ads/library/api) to get all the Ads Library information on the Facebook platform; +## Supported Parameters +The CLI supports the following parameters: + +* `-t, --access-token`: The Facebook developer access token (required). +* `-f, --fields`: Fields to retrieve from the Ad Library API (required). +* `-s, --search-term`: The term you want to search for. +* `-c, --country`: Comma-separated country code (no spaces) (required). +* `--search-page-ids`: The specific Facebook Page you want to search. +* `--ad-active-status`: Filter by the current status of the ads at the moment the script runs. +* `--before-date`: Search for ads delivered before this date (inclusive). +* `--after-date`: Search for ads delivered after this date (inclusive). +* `--ad-type`: Search by type of ad (choices: `ALL`, `EMPLOYMENT_ADS`, `FINANCIAL_PRODUCTS_AND_SERVICES_ADS`, `HOUSING_ADS`, `POLITICAL_AND_ISSUE_ADS`). +* `--bylines`: Filter results for ads with a paid for by disclaimer byline. +* `--delivery-by-region`: View ads by the region where Accounts Center accounts were based or located. +* `--estimated-audience-size-max`: Search for ads with a maximum estimated audience size. +* `--estimated-audience-size-min`: Search for ads with a minimum estimated audience size. +* `--languages`: Search for ads based on the language(s) contained in the ad. +* `--media-type`: Search for ads based on whether they contain a specific type of media (choices: `ALL`, `IMAGE`, `MEME`, `VIDEO`, `NONE`). +* `--publisher-platforms`: Search for ads based on whether they appear on a particular Meta technology. +* `--search-type`: The type of search to use for the search_terms field (choices: `KEYWORD_UNORDERED`, `KEYWORD_EXACT_PHRASE`, default: `KEYWORD_UNORDERED`). +* `--unmask-removed-content`: Specify whether you would like your results to reveal content that was removed for violating our standards. +* `--batch-size`: Batch size. +* `--retry-limit`: When an error occurs, the script will abort if it fails to get the same batch this amount of times (default: 3). +* `-v, --verbose`: Enable verbose output. + +## Supported Fields +The following fields can be queried from the Ad Library API: + +* `id` +* `ad_creation_time` +* `ad_creative_bodies` +* `ad_creative_link_captions` +* `ad_creative_link_descriptions` +* `ad_creative_link_titles` +* `ad_delivery_start_time` +* `ad_delivery_stop_time` +* `ad_snapshot_url` +* `age_country_gender_reach_breakdown` +* `beneficiary_payers` +* `br_total_reach` +* `bylines` +* `currency` +* `delivery_by_region` +* `demographic_distribution` +* `estimated_audience_size` +* `eu_total_reach` +* `impressions` +* `languages` +* `page_id` +* `page_name` +* `publisher_platforms` +* `spend` +* `target_ages` +* `target_gender` +* `target_locations` ## Full documentation You can find the full documentation here: (--to-be-added--) diff --git a/python/fb_ads_library_api.py b/python/fb_ads_library_api.py index e673481..36172a2 100755 --- a/python/fb_ads_library_api.py +++ b/python/fb_ads_library_api.py @@ -23,10 +23,9 @@ def get_ad_archive_id(data): class FbAdsLibraryTraversal: default_url_pattern = ( "https://graph.facebook.com/{}/ads_archive?access_token={}&" - + "fields={}&search_terms={}&ad_reached_countries={}&search_page_ids={}&" - + "ad_active_status={}&limit={}" + + "fields={}&search_terms={}&ad_reached_countries={}" ) - default_api_version = "v14.0" + default_api_version = "v21.0" def __init__( self, @@ -36,7 +35,18 @@ def __init__( country, search_page_ids="", ad_active_status="ALL", - after_date="1970-01-01", + before_date=None, + after_date="2000-01-01", + ad_type="ALL", + bylines=None, + delivery_by_region=None, + estimated_audience_size_max=None, + estimated_audience_size_min=None, + languages=None, + media_type="ALL", + publisher_platforms=None, + search_type="KEYWORD_UNORDERED", + unmask_removed_content=False, page_limit=500, api_version=None, retry_limit=3, @@ -44,11 +54,24 @@ def __init__( self.page_count = 0 self.access_token = access_token self.fields = fields + if "ad_delivery_start_time" not in self.fields: + self.fields += ",ad_delivery_start_time" self.search_term = search_term self.country = country - self.after_date = after_date self.search_page_ids = search_page_ids self.ad_active_status = ad_active_status + self.before_date = before_date + self.after_date = after_date + self.ad_type = ad_type + self.bylines = bylines + self.delivery_by_region = delivery_by_region + self.estimated_audience_size_max = estimated_audience_size_max + self.estimated_audience_size_min = estimated_audience_size_min + self.languages = languages + self.media_type = media_type + self.publisher_platforms = publisher_platforms + self.search_type = search_type + self.unmask_removed_content = unmask_removed_content self.page_limit = page_limit self.retry_limit = retry_limit if api_version is None: @@ -57,31 +80,53 @@ def __init__( self.api_version = api_version def generate_ad_archives(self): - next_page_url = self.default_url_pattern.format( + base_url = self.default_url_pattern.format( self.api_version, self.access_token, self.fields, self.search_term, self.country, - self.search_page_ids, - self.ad_active_status, - self.page_limit, ) + optional_params = { + "search_page_ids": self.search_page_ids, + "ad_active_status": self.ad_active_status, + "ad_delivery_date_max": self.before_date, + "ad_delivery_date_min": self.after_date if self.after_date != "2000-01-01" else None, + "ad_type": self.ad_type, + "bylines": self.bylines, + "delivery_by_region": self.delivery_by_region, + "estimated_audience_size_max": self.estimated_audience_size_max, + "estimated_audience_size_min": self.estimated_audience_size_min, + "languages": self.languages, + "media_type": self.media_type, + "publisher_platforms": self.publisher_platforms, + "search_type": self.search_type, + "unmask_removed_content": self.unmask_removed_content, + "limit": self.page_limit, + } + optional_params_str = "&".join( + f"{key}={value}" for key, value in optional_params.items() if value + ) + next_page_url = f"{base_url}&{optional_params_str}" return self.__class__._get_ad_archives_from_url( next_page_url, after_date=self.after_date, retry_limit=self.retry_limit ) @staticmethod def _get_ad_archives_from_url( - next_page_url, after_date="1970-01-01", retry_limit=3 + next_page_url, after_date="2000-01-01", retry_limit=3 ): last_error_url = None last_retry_count = 0 - start_time_cutoff_after = datetime.strptime(after_date, "%Y-%m-%d").timestamp() - + print("after_date: ", after_date) + print("next_page_url: ", next_page_url) + start_time_cutoff_after = datetime.strptime(after_date, "%Y-%m-%d") + print("start_time_cutoff_after: ", start_time_cutoff_after) + start_time_cutoff_after = start_time_cutoff_after.timestamp() while next_page_url is not None: response = requests.get(next_page_url) response_data = json.loads(response.text) + print("response_data: ", response_data) if "error" in response_data: if next_page_url == last_error_url: # failed again @@ -121,7 +166,7 @@ def _get_ad_archives_from_url( next_page_url = None @classmethod - def generate_ad_archives_from_url(cls, failure_url, after_date="1970-01-01"): + def generate_ad_archives_from_url(cls, failure_url, after_date="2000-01-01"): """ if we failed from error, later we can just continue from the last failure url """ diff --git a/python/fb_ads_library_api_cli.py b/python/fb_ads_library_api_cli.py index c54a082..7941cab 100644 --- a/python/fb_ads_library_api_cli.py +++ b/python/fb_ads_library_api_cli.py @@ -46,12 +46,55 @@ def get_parser(): help="Filter by the current status of the ads at the moment the script runs", ) parser.add_argument( - "--after-date", help="Only return ads that started delivery after this date" + "--before-date", help="Search for ads delivered before this date (inclusive)" + ) + parser.add_argument( + "--after-date", help="Search for ads delivered after this date (inclusive)" + ) + parser.add_argument( + "--ad-type", + help="Search by type of ad", + choices=["ALL", "EMPLOYMENT_ADS", "FINANCIAL_PRODUCTS_AND_SERVICES_ADS", "HOUSING_ADS", "POLITICAL_AND_ISSUE_ADS"], + ) + parser.add_argument( + "--bylines", help="Filter results for ads with a paid for by disclaimer byline" + ) + parser.add_argument( + "--delivery-by-region", help="View ads by the region where Accounts Center accounts were based or located" + ) + parser.add_argument( + "--estimated-audience-size-max", type=int, help="Search for ads with a maximum estimated audience size" + ) + parser.add_argument( + "--estimated-audience-size-min", type=int, help="Search for ads with a minimum estimated audience size" + ) + parser.add_argument( + "--languages", help="Search for ads based on the language(s) contained in the ad" + ) + parser.add_argument( + "--media-type", + help="Search for ads based on whether they contain a specific type of media", + choices=["ALL", "IMAGE", "MEME", "VIDEO", "NONE"], + ) + parser.add_argument( + "--publisher-platforms", help="Search for ads based on whether they appear on a particular Meta technology" + ) + parser.add_argument( + "--search-type", + help="The type of search to use for the search_terms field", + choices=["KEYWORD_UNORDERED", "KEYWORD_EXACT_PHRASE"], + default="KEYWORD_UNORDERED", + ) + parser.add_argument( + "--unmask-removed-content", + action="store_true", + help="Specify whether you would like your results to reveal content that was removed for violating our standards", ) parser.add_argument("--batch-size", type=int, help="Batch size") parser.add_argument( "--retry-limit", type=int, + default=3, help="When an error occurs, the script will abort if it fails to get the same batch this amount of times", ) parser.add_argument("-v", "--verbose", action="store_true") @@ -115,19 +158,27 @@ def main(): search_term = "." else: search_term = opts.search_term + api = FbAdsLibraryTraversal( - opts.access_token, opts.fields, search_term, opts.country - ) - if opts.search_page_ids: - api.search_page_ids = opts.search_page_ids - if opts.ad_active_status: - api.ad_active_status = opts.ad_active_status - if opts.batch_size: - api.page_limit = opts.batch_size - if opts.retry_limit: - api.retry_limit = opts.retry_limit - if opts.after_date: - api.after_date = opts.after_date + opts.access_token, opts.fields, search_term, opts.country, + search_page_ids=opts.search_page_ids if opts.search_page_ids else "", + ad_active_status=opts.ad_active_status if opts.ad_active_status else "", + before_date=opts.before_date, + after_date=opts.after_date if opts.after_date else "2000-01-01", + ad_type=opts.ad_type, + bylines=opts.bylines, + delivery_by_region=opts.delivery_by_region, + estimated_audience_size_max=opts.estimated_audience_size_max, + estimated_audience_size_min=opts.estimated_audience_size_min, + languages=opts.languages, + media_type=opts.media_type, + publisher_platforms=opts.publisher_platforms, + search_type=opts.search_type, + unmask_removed_content=opts.unmask_removed_content, + page_limit=opts.batch_size, + retry_limit=opts.retry_limit, + ) + generator_ad_archives = api.generate_ad_archives() if opts.action in get_operators(): if opts.action == "save_to_csv": diff --git a/python/fb_ads_library_api_utils.py b/python/fb_ads_library_api_utils.py index 6b74fe3..6bc017e 100644 --- a/python/fb_ads_library_api_utils.py +++ b/python/fb_ads_library_api_utils.py @@ -44,31 +44,33 @@ "US", ] valid_query_fields = [ + "id", "ad_creation_time", - "ad_creative_body", "ad_creative_bodies", - "ad_creative_link_caption", "ad_creative_link_captions", - "ad_creative_link_description", "ad_creative_link_descriptions", - "ad_creative_link_title", "ad_creative_link_titles", "ad_delivery_start_time", "ad_delivery_stop_time", "ad_snapshot_url", + "age_country_gender_reach_breakdown", + "beneficiary_payers", + "br_total_reach", + "bylines", "currency", "delivery_by_region", "demographic_distribution", - "bylines", - "id", + "estimated_audience_size", + "eu_total_reach", "impressions", "languages", "page_id", "page_name", - "potential_reach", "publisher_platforms", - "region_distribution", "spend", + "target_ages", + "target_gender", + "target_locations", ]