From 4a846d1c68a92596b6540f0c75317d9662546e77 Mon Sep 17 00:00:00 2001 From: Nick Daugherty Date: Fri, 2 Nov 2018 14:24:27 -0600 Subject: [PATCH 1/3] Add new function to query entries without using get_all_entries_asc() This uses a direct get_comments() query (via $this->get() ) with a date_query to find the relevant entries. This means we can get entries without using the full cache from get_all_entries_asc(), which doesn't scale beyond the number of entries that fit inside a single cache entry --- classes/class-wpcom-liveblog-entry-query.php | 21 ++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/classes/class-wpcom-liveblog-entry-query.php b/classes/class-wpcom-liveblog-entry-query.php index baa9ac0a0..9094ffff0 100644 --- a/classes/class-wpcom-liveblog-entry-query.php +++ b/classes/class-wpcom-liveblog-entry-query.php @@ -171,6 +171,27 @@ public function get_between_timestamps( $start_timestamp, $end_timestamp ) { return $this->find_between_timestamps( $all_entries, $start_timestamp, $end_timestamp ); } + /** + * Get entries between two timestamps, using a Date Query. + * + * @param int $start_timestamp + * @param int $end_timestamp + * @return array + */ + public function get_between_timestamps_with_query( $start_timestamp, $end_timestamp ) { + $args = array( + 'date_query' => array( + 'after' => date( 'c' , $start_timestamp ), + 'before' => date( 'c' , $end_timestamp ), + 'inclusive' => true, + ), + ); + + $entries = $this->get( $args ); + + return self::remove_replaced_entries( $entries ); + } + public function has_any() { return (bool) $this->get(); } From 218ac9d095c389cec47fc20cc1b7237a00ab6569 Mon Sep 17 00:00:00 2001 From: Nick Daugherty Date: Fri, 2 Nov 2018 14:25:18 -0600 Subject: [PATCH 2/3] Add function to count total entries Previously we had to get all entries and count that array, which is inefficient and doesn't scale beyond the size of a single cache entry (usually 1MB) --- classes/class-wpcom-liveblog-entry-query.php | 47 ++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/classes/class-wpcom-liveblog-entry-query.php b/classes/class-wpcom-liveblog-entry-query.php index 9094ffff0..c01d530c8 100644 --- a/classes/class-wpcom-liveblog-entry-query.php +++ b/classes/class-wpcom-liveblog-entry-query.php @@ -192,6 +192,53 @@ public function get_between_timestamps_with_query( $start_timestamp, $end_timest return self::remove_replaced_entries( $entries ); } + /** + * Get the total number of entries for a Liveblog post + * + * The total entries is the count of _all_ entries, minus deletions + * + * Deletions are stored as regular comments, but with no content and a + * "replaces" meta value + * + * NOTE - currently updates are counted as entries, which is probably wrong + * and will throw off pagination, but this is designed to match the behavior + * in WPCOM_Liveblog::flatten_entries(), which is also used for pagination calculation + * + * @return int + */ + public function count_entries() { + $latest_timestamp = $this->get_latest_timestamp(); + + $cache_key = $this->key . '_entries_count_' . $this->post_id . '_' . $latest_timestamp; + + $count = wp_cache_get( $cache_key, 'liveblog' ); + + if ( false !== $count ) { + return $count; + } + + // Count all comments, excluding deletions + global $wpdb; + + $count = $wpdb->get_var( + $wpdb->prepare( + "SELECT COUNT(*) as count FROM $wpdb->comments WHERE + comment_post_id = %d + AND comment_type = %s + AND comment_approved = %s + AND comment_content != %s", + $this->post_id, + $this->key, + $this->key, + '' + ) + ); + + wp_cache_set( $cache_key, $count, 'liveblog' ); + + return $count; + } + public function has_any() { return (bool) $this->get(); } From 777b04251fbf31f7a0c127a01c507a13a2df197d Mon Sep 17 00:00:00 2001 From: Nick Daugherty Date: Fri, 2 Nov 2018 14:28:16 -0600 Subject: [PATCH 3/3] Add a cached version of get_entries_by_time() Better caching approach that scales out on larger Liveblogs, and prevents redundant DB queries and processing on all Liveblogs --- liveblog.php | 93 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 93 insertions(+) diff --git a/liveblog.php b/liveblog.php index dd715fc53..916837060 100644 --- a/liveblog.php +++ b/liveblog.php @@ -477,6 +477,99 @@ public static function get_entries_by_time( $start_timestamp, $end_timestamp ) { return $result; } + /** + * Get Liveblog entries between a start and end time for a post, with + * cached results. + * + * NOTE - get_entries_by_time() is also cached, but it uses a full list + * of all entries, via WPCOM_Liveblog_Entry_Query::get_all_entries_asc(). + * That approach breaks down when there are more entries than can fit in + * a single cache key, so this function caches just the results + * + * This extra caching also bypasses both the additional processing + * required that happens inside $entry->for_json(), which is _always_ at least one db + * query (for the comment author user coming from get_comment_class()), and + * various the filters and processing performed there + * + * @param int $start_timestamp The start time boundary + * @param int $end_timestamp The end time boundary + * + * @return An array of Liveblog entries, possibly empty. + */ + public static function get_entries_by_time_cached( $start_timestamp, $end_timestamp ) { + // Set some defaults + $latest_timestamp = null; + $entries_for_json = array(); + + $now = time(); + + // If end timestamp is in future, set a cache TTL until it's not + // Must do this to match behavior of get_entries_by_time() + if ( $end_timestamp > $now ) { + self::$cache_control_max_age = $end_timestamp - $now; + } + + if ( empty( self::$entry_query ) ) { + self::$entry_query = new WPCOM_Liveblog_Entry_Query( self::$post_id, self::KEY ); + } + + $latest_timestamp = self::$entry_query->get_latest_timestamp(); + + // If the requested timestamp is later than the latest comment timestamp, + // normalize it to the latest comment timestamp to get a higher hitrate + if ( $end_timestamp > $latest_timestamp ) { + $end_timestamp = $latest_timestamp; + } + + $cache_key = self::KEY . '_result_' . self::$post_id . '_' . $start_timestamp . '_' . $end_timestamp; + + // Were they cached already? + $cached_result = wp_cache_get( $cache_key, 'liveblog' ); + + if ( false !== $cached_result ) { + return $cached_result; + } + + // Get liveblog entries within the start and end boundaries + $entries = self::$entry_query->get_between_timestamps_with_query( $start_timestamp, $end_timestamp ); + + $pages = false; + $per_page = WPCOM_Liveblog_Lazyloader::get_number_of_entries(); + + if ( ! empty( $entries ) ) { + $entries_count = self::$entry_query->count_entries(); + + /** + * Loop through each liveblog entry, set the most recent timestamp, and + * put the JSON data for each entry into a neat little array. + */ + foreach ( $entries as $entry ) { + $latest_timestamp = max( $latest_timestamp, $entry->get_timestamp() ); + $entries_for_json[] = $entry->for_json(); + } + + $pages = ceil( $entries_count / $per_page ); + } + + // Create the result array + $result = array( + 'entries' => $entries_for_json, + 'latest_timestamp' => $latest_timestamp, + 'refresh_interval' => self::get_refresh_interval(), + 'pages' => $pages, + ); + + if ( ! empty( $entries_for_json ) ) { + do_action( 'liveblog_entry_request', $result ); + } else { + do_action( 'liveblog_entry_request_empty' ); + } + + wp_cache_set( $cache_key, $result, 'liveblog' ); + + return $result; + } + /** * Is a given post_id a liveblog enabled post? *