facebookresearch · ruggsea · Nov 3, 2024 · Nov 3, 2024 · Nov 3, 2024 · Nov 3, 2024
diff --git a/README.md b/README.md
@@ -1,25 +1,83 @@
 # Ads-Library-API-Script-Repository
-Ads-Library-API-Script-Repository is a set of code examples to help user/researchers understand how the Facebook Ads Library API works. It also provides a simple command-line interface(CLI) for users to easily use the Facebook Ads Library API.
+Ads-Library-API-Script-Repository is a set of code examples to help users/researchers understand how the Facebook Ads Library API works. It also provides a simple command-line interface (CLI) for users to easily use the Facebook Ads Library API.
 
 ## Examples
-Here's an example on how to use the CLI:
+Here's an example of how to use the CLI:
 
-    $ python fb_ads_library_api_cli.py -t {access_token} -f 'page_id,ad_snapshot_url,funding_entity,ad_delivery_start_time' -c 'CA' -s '.' -v count
+    $ python fb_ads_library_api_cli.py -t {access_token} -f 'page_id,ad_snapshot_url,funding_entity,ad_delivery_start_time' -c 'CA' -s 'election' -v save_to_csv output.csv
 
-It would count the number of all polictical ads in CA(Canada);
+This command will save the details of all political ads in Canada (CA) containing the term 'election' to a CSV file named `output.csv`.
 
-Note: please replace the '{access_token}' with your [Facebook Developer access token](https://developers.facebook.com/tools/accesstoken/).
+Note: please replace the `{access_token}` with your [Facebook Developer access token](https://developers.facebook.com/tools/accesstoken/).
 
 ## Requirements
-Ads-Library-API-Script-Repository requires or works with
-* Mac OS X or Linux or Window
+Ads-Library-API-Script-Repository requires or works with:
+* Mac OS X, Linux, or Windows
 * Python 3.0+
 * Python Requests Library ([installation](https://docs.python-requests.org/en/master/user/install/#install))
 * Python iso3166 Library ([installation](https://pypi.org/project/iso3166/))
 
+## Features
+The script provides the following features:
+* Query the [Facebook Ads Library API](https://www.facebook.com/ads/library/api) to get all the Ads Library information on the Facebook platform.
+* Filter ads by various parameters such as country, ad type, delivery date, audience size, and more.
+* Save the results to a CSV file or perform other actions as defined by the CLI.
 
-## How Ads-Library-API-Script-Repository works
-The script will query the [Facebook Ads library API](https://www.facebook.com/ads/library/api) to get all the Ads Library information on the Facebook platform;
+## Supported Parameters
+The CLI supports the following parameters:
+
+* `-t, --access-token`: The Facebook developer access token (required).
+* `-f, --fields`: Fields to retrieve from the Ad Library API (required).
+* `-s, --search-term`: The term you want to search for.
+* `-c, --country`: Comma-separated country code (no spaces) (required).
+* `--search-page-ids`: The specific Facebook Page you want to search.
+* `--ad-active-status`: Filter by the current status of the ads at the moment the script runs.
+* `--before-date`: Search for ads delivered before this date (inclusive).
+* `--after-date`: Search for ads delivered after this date (inclusive).
+* `--ad-type`: Search by type of ad (choices: `ALL`, `EMPLOYMENT_ADS`, `FINANCIAL_PRODUCTS_AND_SERVICES_ADS`, `HOUSING_ADS`, `POLITICAL_AND_ISSUE_ADS`).
+* `--bylines`: Filter results for ads with a paid for by disclaimer byline.
+* `--delivery-by-region`: View ads by the region where Accounts Center accounts were based or located.
+* `--estimated-audience-size-max`: Search for ads with a maximum estimated audience size.
+* `--estimated-audience-size-min`: Search for ads with a minimum estimated audience size.
+* `--languages`: Search for ads based on the language(s) contained in the ad.
+* `--media-type`: Search for ads based on whether they contain a specific type of media (choices: `ALL`, `IMAGE`, `MEME`, `VIDEO`, `NONE`).
+* `--publisher-platforms`: Search for ads based on whether they appear on a particular Meta technology.
+* `--search-type`: The type of search to use for the search_terms field (choices: `KEYWORD_UNORDERED`, `KEYWORD_EXACT_PHRASE`, default: `KEYWORD_UNORDERED`).
+* `--unmask-removed-content`: Specify whether you would like your results to reveal content that was removed for violating our standards.
+* `--batch-size`: Batch size.
+* `--retry-limit`: When an error occurs, the script will abort if it fails to get the same batch this amount of times (default: 3).
+* `-v, --verbose`: Enable verbose output.
+
+## Supported Fields
+The following fields can be queried from the Ad Library API:
+
+* `id`
+* `ad_creation_time`
+* `ad_creative_bodies`
+* `ad_creative_link_captions`
+* `ad_creative_link_descriptions`
+* `ad_creative_link_titles`
+* `ad_delivery_start_time`
+* `ad_delivery_stop_time`
+* `ad_snapshot_url`
+* `age_country_gender_reach_breakdown`
+* `beneficiary_payers`
+* `br_total_reach`
+* `bylines`
+* `currency`
+* `delivery_by_region`
+* `demographic_distribution`
+* `estimated_audience_size`
+* `eu_total_reach`
+* `impressions`
+* `languages`
+* `page_id`
+* `page_name`
+* `publisher_platforms`
+* `spend`
+* `target_ages`
+* `target_gender`
+* `target_locations`
 
 ## Full documentation
 You can find the full documentation here: (--to-be-added--)

diff --git a/python/fb_ads_library_api.py b/python/fb_ads_library_api.py
@@ -23,10 +23,9 @@ def get_ad_archive_id(data):
 class FbAdsLibraryTraversal:
     default_url_pattern = (
         "https://graph.facebook.com/{}/ads_archive?access_token={}&"
-        + "fields={}&search_terms={}&ad_reached_countries={}&search_page_ids={}&"
-        + "ad_active_status={}&limit={}"
+        + "fields={}&search_terms={}&ad_reached_countries={}"
     )
-    default_api_version = "v14.0"
+    default_api_version = "v21.0"
 
     def __init__(
         self,
@@ -36,19 +35,43 @@ def __init__(
         country,
         search_page_ids="",
         ad_active_status="ALL",
-        after_date="1970-01-01",
+        before_date=None,
+        after_date="2000-01-01",
+        ad_type="ALL",
+        bylines=None,
+        delivery_by_region=None,
+        estimated_audience_size_max=None,
+        estimated_audience_size_min=None,
+        languages=None,
+        media_type="ALL",
+        publisher_platforms=None,
+        search_type="KEYWORD_UNORDERED",
+        unmask_removed_content=False,
         page_limit=500,
         api_version=None,
         retry_limit=3,
     ):
         self.page_count = 0
         self.access_token = access_token
         self.fields = fields
+        if "ad_delivery_start_time" not in self.fields:
+            self.fields += ",ad_delivery_start_time"
         self.search_term = search_term
         self.country = country
-        self.after_date = after_date
         self.search_page_ids = search_page_ids
         self.ad_active_status = ad_active_status
+        self.before_date = before_date
+        self.after_date = after_date
+        self.ad_type = ad_type
+        self.bylines = bylines
+        self.delivery_by_region = delivery_by_region
+        self.estimated_audience_size_max = estimated_audience_size_max
+        self.estimated_audience_size_min = estimated_audience_size_min
+        self.languages = languages
+        self.media_type = media_type
+        self.publisher_platforms = publisher_platforms
+        self.search_type = search_type
+        self.unmask_removed_content = unmask_removed_content
         self.page_limit = page_limit
         self.retry_limit = retry_limit
         if api_version is None:
@@ -57,31 +80,53 @@ def __init__(
             self.api_version = api_version
 
     def generate_ad_archives(self):
-        next_page_url = self.default_url_pattern.format(
+        base_url = self.default_url_pattern.format(
             self.api_version,
             self.access_token,
             self.fields,
             self.search_term,
             self.country,
-            self.search_page_ids,
-            self.ad_active_status,
-            self.page_limit,
         )
+        optional_params = {
+            "search_page_ids": self.search_page_ids,
+            "ad_active_status": self.ad_active_status,
+            "ad_delivery_date_max": self.before_date,
+            "ad_delivery_date_min": self.after_date if self.after_date != "2000-01-01" else None,
+            "ad_type": self.ad_type,
+            "bylines": self.bylines,
+            "delivery_by_region": self.delivery_by_region,
+            "estimated_audience_size_max": self.estimated_audience_size_max,
+            "estimated_audience_size_min": self.estimated_audience_size_min,
+            "languages": self.languages,
+            "media_type": self.media_type,
+            "publisher_platforms": self.publisher_platforms,
+            "search_type": self.search_type,
+            "unmask_removed_content": self.unmask_removed_content,
+            "limit": self.page_limit,
+        }
+        optional_params_str = "&".join(
+            f"{key}={value}" for key, value in optional_params.items() if value
+        )
+        next_page_url = f"{base_url}&{optional_params_str}"
         return self.__class__._get_ad_archives_from_url(
             next_page_url, after_date=self.after_date, retry_limit=self.retry_limit
         )
 
     @staticmethod
     def _get_ad_archives_from_url(
-        next_page_url, after_date="1970-01-01", retry_limit=3
+        next_page_url, after_date="2000-01-01", retry_limit=3
     ):
         last_error_url = None
         last_retry_count = 0
-        start_time_cutoff_after = datetime.strptime(after_date, "%Y-%m-%d").timestamp()
-
+        print("after_date: ", after_date)
+        print("next_page_url: ", next_page_url)
+        start_time_cutoff_after = datetime.strptime(after_date, "%Y-%m-%d")
+        print("start_time_cutoff_after: ", start_time_cutoff_after)
+        start_time_cutoff_after = start_time_cutoff_after.timestamp()
         while next_page_url is not None:
             response = requests.get(next_page_url)
             response_data = json.loads(response.text)
+            print("response_data: ", response_data)
             if "error" in response_data:
                 if next_page_url == last_error_url:
                     # failed again
@@ -121,7 +166,7 @@ def _get_ad_archives_from_url(
                 next_page_url = None
 
     @classmethod
-    def generate_ad_archives_from_url(cls, failure_url, after_date="1970-01-01"):
+    def generate_ad_archives_from_url(cls, failure_url, after_date="2000-01-01"):
         """
         if we failed from error, later we can just continue from the last failure url
         """

diff --git a/python/fb_ads_library_api_cli.py b/python/fb_ads_library_api_cli.py
@@ -46,12 +46,55 @@ def get_parser():
         help="Filter by the current status of the ads at the moment the script runs",
     )
     parser.add_argument(
-        "--after-date", help="Only return ads that started delivery after this date"
+        "--before-date", help="Search for ads delivered before this date (inclusive)"
+    )
+    parser.add_argument(
+        "--after-date", help="Search for ads delivered after this date (inclusive)"
+    )
+    parser.add_argument(
+        "--ad-type",
+        help="Search by type of ad",
+        choices=["ALL", "EMPLOYMENT_ADS", "FINANCIAL_PRODUCTS_AND_SERVICES_ADS", "HOUSING_ADS", "POLITICAL_AND_ISSUE_ADS"],
+    )
+    parser.add_argument(
+        "--bylines", help="Filter results for ads with a paid for by disclaimer byline"
+    )
+    parser.add_argument(
+        "--delivery-by-region", help="View ads by the region where Accounts Center accounts were based or located"
+    )
+    parser.add_argument(
+        "--estimated-audience-size-max", type=int, help="Search for ads with a maximum estimated audience size"
+    )
+    parser.add_argument(
+        "--estimated-audience-size-min", type=int, help="Search for ads with a minimum estimated audience size"
+    )
+    parser.add_argument(
+        "--languages", help="Search for ads based on the language(s) contained in the ad"
+    )
+    parser.add_argument(
+        "--media-type",
+        help="Search for ads based on whether they contain a specific type of media",
+        choices=["ALL", "IMAGE", "MEME", "VIDEO", "NONE"],
+    )
+    parser.add_argument(
+        "--publisher-platforms", help="Search for ads based on whether they appear on a particular Meta technology"
+    )
+    parser.add_argument(
+        "--search-type",
+        help="The type of search to use for the search_terms field",
+        choices=["KEYWORD_UNORDERED", "KEYWORD_EXACT_PHRASE"],
+        default="KEYWORD_UNORDERED",
+    )
+    parser.add_argument(
+        "--unmask-removed-content",
+        action="store_true",
+        help="Specify whether you would like your results to reveal content that was removed for violating our standards",
     )
     parser.add_argument("--batch-size", type=int, help="Batch size")
     parser.add_argument(
         "--retry-limit",
         type=int,
+        default=3,
         help="When an error occurs, the script will abort if it fails to get the same batch this amount of times",
     )
     parser.add_argument("-v", "--verbose", action="store_true")
@@ -115,19 +158,27 @@ def main():
         search_term = "."
     else:
         search_term = opts.search_term
+
     api = FbAdsLibraryTraversal(
-        opts.access_token, opts.fields, search_term, opts.country
-    )
-    if opts.search_page_ids:
-        api.search_page_ids = opts.search_page_ids
-    if opts.ad_active_status:
-        api.ad_active_status = opts.ad_active_status
-    if opts.batch_size:
-        api.page_limit = opts.batch_size
-    if opts.retry_limit:
-        api.retry_limit = opts.retry_limit
-    if opts.after_date:
-        api.after_date = opts.after_date
+        opts.access_token, opts.fields, search_term, opts.country,
+        search_page_ids=opts.search_page_ids if opts.search_page_ids else "",
+        ad_active_status=opts.ad_active_status if opts.ad_active_status else "",
+        before_date=opts.before_date,
+        after_date=opts.after_date if opts.after_date else "2000-01-01",
+        ad_type=opts.ad_type,
+        bylines=opts.bylines,
+        delivery_by_region=opts.delivery_by_region,
+        estimated_audience_size_max=opts.estimated_audience_size_max,
+        estimated_audience_size_min=opts.estimated_audience_size_min,
+        languages=opts.languages,
+        media_type=opts.media_type,
+        publisher_platforms=opts.publisher_platforms,
+        search_type=opts.search_type,
+        unmask_removed_content=opts.unmask_removed_content,
+        page_limit=opts.batch_size,
+        retry_limit=opts.retry_limit,
+    )
+
     generator_ad_archives = api.generate_ad_archives()
     if opts.action in get_operators():
         if opts.action == "save_to_csv":

diff --git a/python/fb_ads_library_api_utils.py b/python/fb_ads_library_api_utils.py
@@ -44,31 +44,33 @@
     "US",
 ]
 valid_query_fields = [
+    "id",
     "ad_creation_time",
-    "ad_creative_body",
     "ad_creative_bodies",
-    "ad_creative_link_caption",
     "ad_creative_link_captions",
-    "ad_creative_link_description",
     "ad_creative_link_descriptions",
-    "ad_creative_link_title",
     "ad_creative_link_titles",
     "ad_delivery_start_time",
     "ad_delivery_stop_time",
     "ad_snapshot_url",
+    "age_country_gender_reach_breakdown",
+    "beneficiary_payers",
+    "br_total_reach",
+    "bylines",
     "currency",
     "delivery_by_region",
     "demographic_distribution",
-    "bylines",
-    "id",
+    "estimated_audience_size",
+    "eu_total_reach",
     "impressions",
     "languages",
     "page_id",
     "page_name",
-    "potential_reach",
     "publisher_platforms",
-    "region_distribution",
     "spend",
+    "target_ages",
+    "target_gender",
+    "target_locations",
 ]