diff --git a/.github/changelog/2589-from-description b/.github/changelog/2589-from-description new file mode 100644 index 0000000000..e519d292c6 --- /dev/null +++ b/.github/changelog/2589-from-description @@ -0,0 +1,4 @@ +Significance: minor +Type: added + +Add domain blocklist importer for bulk importing blocked domains. diff --git a/includes/class-moderation.php b/includes/class-moderation.php index c17f8cd412..b28164b01d 100644 --- a/includes/class-moderation.php +++ b/includes/class-moderation.php @@ -223,6 +223,38 @@ public static function add_site_block( $type, $value ) { return true; // Already blocked. } + /** + * Add multiple site-wide blocks at once. + * + * More efficient than calling add_site_block() in a loop as it + * performs a single database update. + * + * @param string $type The block type (domain or keyword only). + * @param array $values Array of values to block. + */ + public static function add_site_blocks( $type, $values ) { + if ( ! in_array( $type, array( self::TYPE_DOMAIN, self::TYPE_KEYWORD ), true ) ) { + return; + } + + if ( empty( $values ) ) { + return; + } + + foreach ( $values as $value ) { + /** + * Fired when a domain or keyword is blocked site-wide. + * + * @param string $value The blocked domain or keyword. + * @param string $type The block type (actor, domain, keyword). + */ + \do_action( 'activitypub_add_site_block', $value, $type ); + } + + $existing = \get_option( self::OPTION_KEYS[ $type ], array() ); + \update_option( self::OPTION_KEYS[ $type ], array_unique( array_merge( $existing, $values ) ) ); + } + /** * Remove a site-wide block. * diff --git a/includes/wp-admin/class-settings-fields.php b/includes/wp-admin/class-settings-fields.php index 24cd4adb56..1886e36bb0 100644 --- a/includes/wp-admin/class-settings-fields.php +++ b/includes/wp-admin/class-settings-fields.php @@ -59,7 +59,11 @@ public static function register_settings_fields() { 'activitypub_moderation', \esc_html__( 'Moderation', 'activitypub' ), array( self::class, 'render_moderation_section_description' ), - 'activitypub_settings' + 'activitypub_settings', + array( + 'before_section' => '
', + 'after_section' => '
', + ) ); // Add settings fields. diff --git a/includes/wp-admin/import/class-blocklist.php b/includes/wp-admin/import/class-blocklist.php new file mode 100644 index 0000000000..83c493846e --- /dev/null +++ b/includes/wp-admin/import/class-blocklist.php @@ -0,0 +1,385 @@ +'; + echo '

' . \esc_html__( 'Import Domain Blocklist', 'activitypub' ) . '

'; + } + + /** + * Display the importer footer. + */ + private static function footer() { + echo ''; + } + + /** + * Display the greeting/intro screen. + */ + private static function greet() { + echo '
'; + echo '

' . \esc_html__( 'Import a domain blocklist to block multiple ActivityPub instances at once. Supported formats:', 'activitypub' ) . '

'; + echo ''; + + // File upload option. + \printf( '

%s

', \esc_html__( 'Option 1: Upload a File', 'activitypub' ) ); + \wp_import_upload_form( 'admin.php?import=blocklist&step=1' ); + + // URL import option. + \printf( '

%s

', \esc_html__( 'Option 2: Import from URL', 'activitypub' ) ); + ?> +
+ +

+ +

+

+ +

+
+ +

+

+
+ + +

+ + + + +

+
+ + '; + } + + /** + * Handle file upload and import. + */ + private static function handle_upload() { + $error_message = \__( 'Sorry, there has been an error.', 'activitypub' ); + + // phpcs:ignore WordPress.Security.NonceVerification.Missing -- Nonce verified in dispatch(). + if ( ! isset( $_FILES['import']['name'] ) ) { + echo '

' . \esc_html( $error_message ) . '
'; + \printf( + /* translators: 1: php.ini, 2: post_max_size, 3: upload_max_filesize */ + \esc_html__( 'File is empty. Please upload something more substantial. This error could also be caused by uploads being disabled in your %1$s file or by %2$s being defined as smaller than %3$s in %1$s.', 'activitypub' ), + 'php.ini', + 'post_max_size', + 'upload_max_filesize' + ); + echo '

'; + return; + } + + // Allow CSV and TXT files. + $allowed_types = array( + 'csv' => 'text/csv', + 'txt' => 'text/plain', + ); + // phpcs:ignore WordPress.Security.NonceVerification.Missing -- Nonce verified in dispatch(). + $file_info = \wp_check_filetype( \sanitize_file_name( $_FILES['import']['name'] ), $allowed_types ); + + if ( ! $file_info['type'] ) { + \printf( + '

%s
%s

', + \esc_html( $error_message ), + \esc_html__( 'The uploaded file must be a CSV or TXT file. Please try again with the correct file format.', 'activitypub' ) + ); + return; + } + + // phpcs:ignore WordPress.Security.NonceVerification.Missing, WordPress.Security.ValidatedSanitizedInput -- Nonce verified in dispatch(), tmp_name is a server path. + $file_path = $_FILES['import']['tmp_name'] ?? ''; + + if ( empty( $file_path ) ) { + \printf( '

%s
%s

', \esc_html( $error_message ), \esc_html__( 'Upload failed. Please try again.', 'activitypub' ) ); + return; + } + + $domains = self::parse_csv( $file_path ); + + if ( empty( $domains ) ) { + \printf( '

%s
%s

', \esc_html( $error_message ), \esc_html__( 'No valid domains found in the file.', 'activitypub' ) ); + return; + } + + self::import( $domains ); + } + + /** + * Handle URL import. + */ + private static function handle_url_import() { + $error_message = \__( 'Sorry, there has been an error.', 'activitypub' ); + + // phpcs:ignore WordPress.Security.NonceVerification.Missing -- Nonce verified in dispatch(). + $url = \sanitize_url( \wp_unslash( $_POST['import_url'] ?? '' ) ); + + if ( empty( $url ) ) { + \printf( '

%s
%s

', \esc_html( $error_message ), \esc_html__( 'Please provide a valid URL.', 'activitypub' ) ); + return; + } + + if ( ! \filter_var( $url, FILTER_VALIDATE_URL ) ) { + \printf( '

%s
%s

', \esc_html( $error_message ), \esc_html__( 'The provided URL is not valid.', 'activitypub' ) ); + return; + } + + // Fetch the URL content. + $response = \wp_remote_get( + $url, + array( + 'timeout' => 30, + 'redirection' => 5, + ) + ); + + if ( \is_wp_error( $response ) ) { + \printf( '

%s
%s

', \esc_html( $error_message ), \esc_html( $response->get_error_message() ) ); + return; + } + + $response_code = \wp_remote_retrieve_response_code( $response ); + if ( 200 !== $response_code ) { + \printf( + '

%s
%s

', + \esc_html( $error_message ), + /* translators: %d: HTTP response code */ + \esc_html( \sprintf( \__( 'Failed to fetch URL. HTTP response code: %d', 'activitypub' ), $response_code ) ) + ); + return; + } + + $body = \wp_remote_retrieve_body( $response ); + if ( empty( $body ) ) { + \printf( '

%s
%s

', \esc_html( $error_message ), \esc_html__( 'The URL returned empty content.', 'activitypub' ) ); + return; + } + + $domains = self::parse_csv_string( $body ); + + if ( empty( $domains ) ) { + \printf( '

%s
%s

', \esc_html( $error_message ), \esc_html__( 'No valid domains found at the URL.', 'activitypub' ) ); + return; + } + + self::import( $domains ); + } + + /** + * Execute the import. + * + * @param array $domains Array of domains to import. + */ + private static function import( $domains ) { + \set_time_limit( 0 ); + + /** + * Fires when the blocklist import starts. + */ + \do_action( 'import_start' ); + + $existing = Moderation::get_site_blocks()[ Moderation::TYPE_DOMAIN ] ?? array(); + $new_domains = \array_diff( $domains, $existing ); + $imported = \count( $new_domains ); + $skipped = \count( $domains ) - $imported; + + Moderation::add_site_blocks( Moderation::TYPE_DOMAIN, $new_domains ); + + /** + * Fires when the blocklist import ends. + */ + \do_action( 'import_end' ); + + echo '

' . \esc_html__( 'Import Complete', 'activitypub' ) . '

'; + + \printf( + '

%s

', + \esc_html( + \sprintf( + /* translators: %s: Number of domains */ + \_n( 'Imported %s domain.', 'Imported %s domains.', $imported, 'activitypub' ), + \number_format_i18n( $imported ) + ) + ) + ); + + if ( $skipped > 0 ) { + \printf( + '

%s

', + \esc_html( + \sprintf( + /* translators: %s: Number of domains */ + \_n( 'Skipped %s domain (already blocked).', 'Skipped %s domains (already blocked).', $skipped, 'activitypub' ), + \number_format_i18n( $skipped ) + ) + ) + ); + } + + \printf( + '

%s

', + \esc_url( \admin_url( 'options-general.php?page=activitypub&tab=settings#moderation' ) ), + \esc_html__( 'View blocked domains in settings', 'activitypub' ) + ); + } + + /** + * Parse a CSV file and extract domain names. + * + * Supports Mastodon CSV format (with #domain header) and simple + * one-domain-per-line format. + * + * @param string $file_path Path to the CSV file. + * @return array Array of unique, valid domain names. + */ + public static function parse_csv( $file_path ) { + if ( ! \file_exists( $file_path ) || ! \is_readable( $file_path ) ) { + return array(); + } + + // phpcs:ignore WordPress.WP.AlternativeFunctions.file_get_contents_file_get_contents -- Reading local file. + $content = \file_get_contents( $file_path ); + if ( false === $content ) { + return array(); + } + + return self::parse_csv_string( $content ); + } + + /** + * Parse CSV content from a string and extract domain names. + * + * Supports Mastodon CSV format (with #domain header) and simple + * one-domain-per-line format. + * + * @param string $content CSV content as a string. + * @return array Array of unique, valid domain names. + */ + public static function parse_csv_string( $content ) { + $domains = array(); + + if ( empty( $content ) ) { + return $domains; + } + + // Split into lines. + $lines = \preg_split( '/\r\n|\r|\n/', $content ); + if ( empty( $lines ) ) { + return $domains; + } + + // Parse first line to detect format. + $first_line = \str_getcsv( $lines[0] ); + $first_cell = \trim( $first_line[0] ?? '' ); + $has_header = \str_starts_with( $first_cell, '#' ) || 'domain' === \strtolower( $first_cell ); + + // Find domain column index. + $domain_index = 0; + if ( $has_header ) { + foreach ( $first_line as $i => $col ) { + $col = \ltrim( \strtolower( \trim( $col ) ), '#' ); + if ( 'domain' === $col ) { + $domain_index = $i; + break; + } + } + // Remove header from lines. + \array_shift( $lines ); + } + + // Process each line. + foreach ( $lines as $line ) { + $row = \str_getcsv( $line ); + $domain = \trim( $row[ $domain_index ] ?? '' ); + + // Skip empty lines and comments. + if ( empty( $domain ) || \str_starts_with( $domain, '#' ) ) { + continue; + } + + if ( self::is_valid_domain( $domain ) ) { + $domains[] = \strtolower( $domain ); + } + } + + return \array_unique( $domains ); + } + + /** + * Validate a domain name. + * + * @param string $domain The domain to validate. + * @return bool True if valid, false otherwise. + */ + private static function is_valid_domain( $domain ) { + // Must contain at least one dot (filter_var would accept "localhost"). + if ( ! \str_contains( $domain, '.' ) ) { + return false; + } + + return (bool) \filter_var( $domain, FILTER_VALIDATE_DOMAIN, FILTER_FLAG_HOSTNAME ); + } +} diff --git a/includes/wp-admin/import/load.php b/includes/wp-admin/import/load.php index cc1aba7f45..7352174224 100644 --- a/includes/wp-admin/import/load.php +++ b/includes/wp-admin/import/load.php @@ -27,6 +27,13 @@ function load() { array( __NAMESPACE__ . '\Mastodon', 'dispatch' ) ); + \register_importer( + 'blocklist', + \__( 'Domain Blocklist', 'activitypub' ), + \__( 'Import a domain blocklist in CSV format (Mastodon, IFTAS DNI, etc.)', 'activitypub' ), + array( __NAMESPACE__ . '\Blocklist', 'dispatch' ) + ); + if ( '1' === \get_option( 'activitypub_following_ui', '0' ) ) { \register_importer( 'starter-kit', diff --git a/tests/phpunit/tests/includes/wp-admin/import/class-test-blocklist.php b/tests/phpunit/tests/includes/wp-admin/import/class-test-blocklist.php new file mode 100644 index 0000000000..7a6583414a --- /dev/null +++ b/tests/phpunit/tests/includes/wp-admin/import/class-test-blocklist.php @@ -0,0 +1,389 @@ +temp_files as $file ) { + if ( \file_exists( $file ) ) { + \wp_delete_file( $file ); + } + } + $this->temp_files = array(); + + parent::tear_down(); + } + + /** + * Create a temporary CSV file with given content. + * + * @param string $content The file content. + * @return string The path to the temporary file. + */ + private function create_temp_csv( $content ) { + $file = \wp_tempnam( 'blocklist-test-' ); + // phpcs:ignore WordPress.WP.AlternativeFunctions.file_system_operations_file_put_contents + \file_put_contents( $file, $content ); + $this->temp_files[] = $file; + + return $file; + } + + /** + * Test parsing Mastodon CSV format with #domain header. + * + * @covers ::parse_csv + */ + public function test_parse_csv_mastodon_format() { + $csv_content = "#domain,#severity,#public_comment,#private_comment\n"; + $csv_content .= "example.com,suspend,\"Spam\",\"\"\n"; + $csv_content .= "bad.org,silence,\"Abuse\",\"Internal note\"\n"; + $csv_content .= "spam.net,suspend,\"\",\"\"\n"; + + $file = $this->create_temp_csv( $csv_content ); + $domains = Blocklist::parse_csv( $file ); + + $this->assertCount( 3, $domains ); + $this->assertContains( 'example.com', $domains ); + $this->assertContains( 'bad.org', $domains ); + $this->assertContains( 'spam.net', $domains ); + } + + /** + * Test parsing Mastodon CSV format with domain column not in first position. + * + * @covers ::parse_csv + */ + public function test_parse_csv_mastodon_format_domain_not_first() { + $csv_content = "#severity,#domain,#public_comment\n"; + $csv_content .= "suspend,example.com,\"Spam\"\n"; + $csv_content .= "silence,bad.org,\"Abuse\"\n"; + + $file = $this->create_temp_csv( $csv_content ); + $domains = Blocklist::parse_csv( $file ); + + $this->assertCount( 2, $domains ); + $this->assertContains( 'example.com', $domains ); + $this->assertContains( 'bad.org', $domains ); + } + + /** + * Test parsing simple domain-per-line format. + * + * @covers ::parse_csv + */ + public function test_parse_csv_simple_format() { + $csv_content = "example.com\n"; + $csv_content .= "bad.org\n"; + $csv_content .= "spam.net\n"; + + $file = $this->create_temp_csv( $csv_content ); + $domains = Blocklist::parse_csv( $file ); + + $this->assertCount( 3, $domains ); + $this->assertContains( 'example.com', $domains ); + $this->assertContains( 'bad.org', $domains ); + $this->assertContains( 'spam.net', $domains ); + } + + /** + * Test parsing CSV with 'domain' header (without #). + * + * @covers ::parse_csv + */ + public function test_parse_csv_domain_header_without_hash() { + $csv_content = "domain,comment\n"; + $csv_content .= "example.com,\"Test domain\"\n"; + $csv_content .= "bad.org,\"Another domain\"\n"; + + $file = $this->create_temp_csv( $csv_content ); + $domains = Blocklist::parse_csv( $file ); + + $this->assertCount( 2, $domains ); + $this->assertContains( 'example.com', $domains ); + $this->assertContains( 'bad.org', $domains ); + } + + /** + * Test that duplicate domains are removed. + * + * @covers ::parse_csv + */ + public function test_parse_csv_removes_duplicates() { + $csv_content = "example.com\n"; + $csv_content .= "bad.org\n"; + $csv_content .= "example.com\n"; + $csv_content .= "Example.Com\n"; // Should be treated as duplicate (case-insensitive). + + $file = $this->create_temp_csv( $csv_content ); + $domains = Blocklist::parse_csv( $file ); + + $this->assertCount( 2, $domains ); + $this->assertContains( 'example.com', $domains ); + $this->assertContains( 'bad.org', $domains ); + } + + /** + * Test that domains are normalized to lowercase. + * + * @covers ::parse_csv + */ + public function test_parse_csv_normalizes_lowercase() { + $csv_content = "Example.COM\n"; + $csv_content .= "BAD.org\n"; + $csv_content .= "Spam.NET\n"; + + $file = $this->create_temp_csv( $csv_content ); + $domains = Blocklist::parse_csv( $file ); + + $this->assertCount( 3, $domains ); + $this->assertContains( 'example.com', $domains ); + $this->assertContains( 'bad.org', $domains ); + $this->assertContains( 'spam.net', $domains ); + $this->assertNotContains( 'Example.COM', $domains ); + } + + /** + * Test that comment lines are skipped. + * + * @covers ::parse_csv + */ + public function test_parse_csv_skips_comments() { + $csv_content = "# This is a comment\n"; + $csv_content .= "example.com\n"; + $csv_content .= "# Another comment\n"; + $csv_content .= "bad.org\n"; + + $file = $this->create_temp_csv( $csv_content ); + $domains = Blocklist::parse_csv( $file ); + + $this->assertCount( 2, $domains ); + $this->assertContains( 'example.com', $domains ); + $this->assertContains( 'bad.org', $domains ); + } + + /** + * Test that empty lines are skipped. + * + * @covers ::parse_csv + */ + public function test_parse_csv_skips_empty_lines() { + $csv_content = "example.com\n"; + $csv_content .= "\n"; + $csv_content .= " \n"; + $csv_content .= "bad.org\n"; + + $file = $this->create_temp_csv( $csv_content ); + $domains = Blocklist::parse_csv( $file ); + + $this->assertCount( 2, $domains ); + $this->assertContains( 'example.com', $domains ); + $this->assertContains( 'bad.org', $domains ); + } + + /** + * Test that invalid domains are skipped. + * + * @covers ::parse_csv + */ + public function test_parse_csv_skips_invalid_domains() { + $csv_content = "example.com\n"; + $csv_content .= "notadomain\n"; // No dot. + $csv_content .= "invalid domain.com\n"; // Space. + $csv_content .= "bad.org\n"; + + $file = $this->create_temp_csv( $csv_content ); + $domains = Blocklist::parse_csv( $file ); + + $this->assertCount( 2, $domains ); + $this->assertContains( 'example.com', $domains ); + $this->assertContains( 'bad.org', $domains ); + $this->assertNotContains( 'notadomain', $domains ); + } + + /** + * Test parsing empty file. + * + * @covers ::parse_csv + */ + public function test_parse_csv_empty_file() { + $file = $this->create_temp_csv( '' ); + $domains = Blocklist::parse_csv( $file ); + + $this->assertEmpty( $domains ); + } + + /** + * Test parsing non-existent file. + * + * @covers ::parse_csv + */ + public function test_parse_csv_nonexistent_file() { + $domains = Blocklist::parse_csv( '/nonexistent/path/to/file.csv' ); + + $this->assertEmpty( $domains ); + } + + /** + * Test parsing file with only header. + * + * @covers ::parse_csv + */ + public function test_parse_csv_only_header() { + $csv_content = "#domain,#severity,#public_comment\n"; + + $file = $this->create_temp_csv( $csv_content ); + $domains = Blocklist::parse_csv( $file ); + + $this->assertEmpty( $domains ); + } + + /** + * Test parsing file with whitespace around domains. + * + * @covers ::parse_csv + */ + public function test_parse_csv_trims_whitespace() { + $csv_content = " example.com \n"; + $csv_content .= " bad.org \n"; + $csv_content .= " spam.net\n"; + + $file = $this->create_temp_csv( $csv_content ); + $domains = Blocklist::parse_csv( $file ); + + $this->assertCount( 3, $domains ); + $this->assertContains( 'example.com', $domains ); + $this->assertContains( 'bad.org', $domains ); + $this->assertContains( 'spam.net', $domains ); + } + + /** + * Test parsing subdomain. + * + * @covers ::parse_csv + */ + public function test_parse_csv_with_subdomains() { + $csv_content = "sub.example.com\n"; + $csv_content .= "deep.sub.example.org\n"; + $csv_content .= "www.test.net\n"; + + $file = $this->create_temp_csv( $csv_content ); + $domains = Blocklist::parse_csv( $file ); + + $this->assertCount( 3, $domains ); + $this->assertContains( 'sub.example.com', $domains ); + $this->assertContains( 'deep.sub.example.org', $domains ); + $this->assertContains( 'www.test.net', $domains ); + } + + /** + * Test parsing large file with many domains. + * + * @covers ::parse_csv + */ + public function test_parse_csv_large_file() { + $csv_content = "#domain\n"; + for ( $i = 0; $i < 1000; $i++ ) { + $csv_content .= "domain{$i}.example.com\n"; + } + + $file = $this->create_temp_csv( $csv_content ); + $domains = Blocklist::parse_csv( $file ); + + $this->assertCount( 1000, $domains ); + $this->assertContains( 'domain0.example.com', $domains ); + $this->assertContains( 'domain999.example.com', $domains ); + } + + /** + * Test parsing domains with hyphens. + * + * @covers ::parse_csv + */ + public function test_parse_csv_domains_with_hyphens() { + $csv_content = "my-example.com\n"; + $csv_content .= "another-test-domain.org\n"; + + $file = $this->create_temp_csv( $csv_content ); + $domains = Blocklist::parse_csv( $file ); + + $this->assertCount( 2, $domains ); + $this->assertContains( 'my-example.com', $domains ); + $this->assertContains( 'another-test-domain.org', $domains ); + } + + /** + * Test that domain starting with hyphen is rejected. + * + * @covers ::parse_csv + */ + public function test_parse_csv_rejects_domain_starting_with_hyphen() { + $csv_content = "-invalid.com\n"; + $csv_content .= "valid.com\n"; + + $file = $this->create_temp_csv( $csv_content ); + $domains = Blocklist::parse_csv( $file ); + + $this->assertCount( 1, $domains ); + $this->assertContains( 'valid.com', $domains ); + $this->assertNotContains( '-invalid.com', $domains ); + } + + /** + * Test that email-like identifiers are skipped gracefully. + * + * @covers ::parse_csv + */ + public function test_parse_csv_skips_email_like_identifiers() { + $csv_content = "user@example.com\n"; + $csv_content .= "admin@bad.org\n"; + $csv_content .= "valid.org\n"; + $csv_content .= "@invalid.net\n"; + + $file = $this->create_temp_csv( $csv_content ); + $domains = Blocklist::parse_csv( $file ); + + $this->assertSame( array( 'valid.org' ), $domains ); + } + + /** + * Test that email-like identifiers in Mastodon CSV format are skipped gracefully. + * + * @covers ::parse_csv + */ + public function test_parse_csv_mastodon_format_skips_email_like_identifiers() { + $csv_content = "#domain,#severity,#public_comment\n"; + $csv_content .= "user@example.com,suspend,\"Test\"\n"; + $csv_content .= "valid.org,silence,\"Test\"\n"; + $csv_content .= "admin@bad.org,suspend,\"Test\"\n"; + + $file = $this->create_temp_csv( $csv_content ); + $domains = Blocklist::parse_csv( $file ); + + $this->assertSame( array( 'valid.org' ), $domains ); + } +}