|
| 1 | +<?php |
| 2 | + |
| 3 | +class WWFAustraliaBridge extends BridgeAbstract |
| 4 | +{ |
| 5 | + const NAME = 'WWF Australia'; |
| 6 | + const URI = 'https://wwf.org.au/'; |
| 7 | + const DESCRIPTION = 'Latest WWF Australia news or blogs with full article content.'; |
| 8 | + const MAINTAINER = 'Scrub000'; |
| 9 | + const CACHE_TIMEOUT = 3600; |
| 10 | + |
| 11 | + const PARAMETERS = [ |
| 12 | + [ |
| 13 | + 'type' => [ |
| 14 | + 'name' => 'Content Type', |
| 15 | + 'type' => 'list', |
| 16 | + 'values' => [ |
| 17 | + 'News' => 'news', |
| 18 | + 'Blogs' => 'blogs', |
| 19 | + ], |
| 20 | + 'defaultValue' => 'news', |
| 21 | + ], |
| 22 | + ], |
| 23 | + ]; |
| 24 | + |
| 25 | + public function collectData() |
| 26 | + { |
| 27 | + $type = $this->getInput('type'); |
| 28 | + $mainPage = getSimpleHTMLDOM(self::URI . $type . '/'); |
| 29 | + $buildId = null; |
| 30 | + |
| 31 | + foreach ($mainPage->find('script#__NEXT_DATA__') as $scriptTag) { |
| 32 | + $json = json_decode($scriptTag->innertext, true); |
| 33 | + if (isset($json['buildId'])) { |
| 34 | + $buildId = $json['buildId']; |
| 35 | + break; |
| 36 | + } |
| 37 | + } |
| 38 | + |
| 39 | + if (!$buildId) { |
| 40 | + returnServerError('Unable to extract Next.js buildId from main page'); |
| 41 | + } |
| 42 | + |
| 43 | + $apiUrl = 'https://291t4y9i4t-dsn.algolia.net/1/indexes/wwf_website_prod_date_sorted/query'; |
| 44 | + $headers = [ |
| 45 | + 'x-algolia-api-key: dd06aa34e50cc3f27dbd8fda34e27b88', |
| 46 | + 'x-algolia-application-id: 291T4Y9I4T', |
| 47 | + 'content-type: application/x-www-form-urlencoded', |
| 48 | + ]; |
| 49 | + |
| 50 | + $recordType = $type === 'blogs' ? 'pageBlog' : 'pageNews'; |
| 51 | + |
| 52 | + $postData = json_encode([ |
| 53 | + 'query' => '', |
| 54 | + 'hitsPerPage' => 10, |
| 55 | + 'filters' => "recordType:'$recordType'", |
| 56 | + 'attributesToHighlight' => [], |
| 57 | + 'attributesToSnippet' => [], |
| 58 | + 'analyticsTags' => [], |
| 59 | + ]); |
| 60 | + |
| 61 | + $context = stream_context_create([ |
| 62 | + 'http' => [ |
| 63 | + 'method' => 'POST', |
| 64 | + 'header' => implode("\r\n", $headers), |
| 65 | + 'content' => $postData, |
| 66 | + ], |
| 67 | + ]); |
| 68 | + |
| 69 | + $response = file_get_contents($apiUrl, false, $context); |
| 70 | + |
| 71 | + if ($response === false) { |
| 72 | + returnServerError('Failed to fetch data from WWF API'); |
| 73 | + } |
| 74 | + |
| 75 | + $data = json_decode($response, true); |
| 76 | + |
| 77 | + foreach ($data['hits'] as $hit) { |
| 78 | + $item = [ |
| 79 | + 'uri' => $hit['url'], |
| 80 | + 'title' => $hit['title'], |
| 81 | + 'timestamp' => strtotime($hit['publishedDate']), |
| 82 | + 'categories' => array_map(function ($tag) { |
| 83 | + $raw = is_array($tag) ? ($tag['key'] ?? '') : (string) $tag; |
| 84 | + return ucwords(str_replace('-', ' ', $raw)); |
| 85 | + }, $hit['tags'] ?? []), |
| 86 | + ]; |
| 87 | + |
| 88 | + $slug = basename($hit['url']); |
| 89 | + |
| 90 | + $jsonUrl = $type === 'blogs' |
| 91 | + ? "https://wwf.org.au/_next/data/$buildId/blogs/$slug.json" |
| 92 | + : "https://wwf.org.au/_next/data/$buildId/news/{$hit['publishedYear']}/$slug.json"; |
| 93 | + |
| 94 | + $jsonArticle = json_decode(getContents($jsonUrl), true); |
| 95 | + $articleItem = $jsonArticle['pageProps']['pagePayload']['page']['items'][0] ?? null; |
| 96 | + |
| 97 | + $linkedEntries = []; |
| 98 | + |
| 99 | + foreach ($articleItem['bodyContent']['links']['entries']['block'] ?? [] as $entry) { |
| 100 | + $linkedEntries[$entry['sys']['id']] = $entry; |
| 101 | + } |
| 102 | + |
| 103 | + foreach ($articleItem['bodyContent']['links']['entries']['hyperlink'] ?? [] as $entry) { |
| 104 | + $linkedEntries[$entry['sys']['id']] = $entry; |
| 105 | + } |
| 106 | + |
| 107 | + $fullContent = null; |
| 108 | + |
| 109 | + if ($articleItem && isset($articleItem['bodyContent']['json'])) { |
| 110 | + $fullContent = $this->renderRichText($articleItem['bodyContent']['json'], $linkedEntries); |
| 111 | + } |
| 112 | + |
| 113 | + $image = ''; |
| 114 | + |
| 115 | + if (!empty($hit['imageUrl'])) { |
| 116 | + $image = '<img src="' . htmlspecialchars($hit['imageUrl']) . '" alt="" /><br>'; |
| 117 | + } |
| 118 | + |
| 119 | + if (!empty($articleItem['hero']['imageSource'][0]['secure_url'])) { |
| 120 | + $imageUrl = $articleItem['hero']['imageSource'][0]['secure_url']; |
| 121 | + $altText = $articleItem['hero']['imageSource'][0]['context']['custom']['alt'] ?? ''; |
| 122 | + $image = '<img src="' . htmlspecialchars($imageUrl) . '" alt="' . htmlspecialchars($altText) . '" /><br>'; |
| 123 | + } |
| 124 | + |
| 125 | + $item['content'] = $image . ($fullContent ?: $hit['content']); |
| 126 | + $this->items[] = $item; |
| 127 | + } |
| 128 | + } |
| 129 | + |
| 130 | + private function renderRichText($json, $linkedEntries = []) |
| 131 | + { |
| 132 | + $html = ''; |
| 133 | + |
| 134 | + foreach ($json['content'] as $node) { |
| 135 | + switch ($node['nodeType']) { |
| 136 | + case 'paragraph': |
| 137 | + case 'heading-2': |
| 138 | + case 'heading-3': |
| 139 | + $tag = $node['nodeType'] === 'paragraph' ? 'p' : ($node['nodeType'] === 'heading-2' ? 'h2' : 'h3'); |
| 140 | + |
| 141 | + $segment = ''; |
| 142 | + |
| 143 | + foreach ($node['content'] as $inline) { |
| 144 | + $segment .= $this->renderInlineNode($inline, $linkedEntries); |
| 145 | + } |
| 146 | + |
| 147 | + $html .= "<$tag>$segment</$tag>"; |
| 148 | + break; |
| 149 | + |
| 150 | + case 'embedded-entry-block': |
| 151 | + $entryId = $node['data']['target']['sys']['id'] ?? ''; |
| 152 | + if (isset($linkedEntries[$entryId])) { |
| 153 | + $block = $linkedEntries[$entryId]; |
| 154 | + |
| 155 | + if ($block['__typename'] === 'ImageBlock') { |
| 156 | + foreach ($block['imagesCollection']['items'] as $imageItem) { |
| 157 | + $image = $imageItem['imageSource'][0] ?? null; |
| 158 | + if ($image) { |
| 159 | + $html .= $this->renderImageHtml($image); |
| 160 | + } |
| 161 | + } |
| 162 | + } elseif ($block['__typename'] === 'MediaImage') { |
| 163 | + $image = $block['imageSource'][0] ?? null; |
| 164 | + if ($image) { |
| 165 | + $html .= $this->renderImageHtml($image); |
| 166 | + } |
| 167 | + } |
| 168 | + } |
| 169 | + break; |
| 170 | + } |
| 171 | + } |
| 172 | + |
| 173 | + return $html; |
| 174 | + } |
| 175 | + |
| 176 | + private function renderInlineNode($inline, $linkedEntries) |
| 177 | + { |
| 178 | + if ($inline['nodeType'] === 'text') { |
| 179 | + $text = htmlspecialchars($inline['value'] ?? ''); |
| 180 | + foreach ($inline['marks'] ?? [] as $mark) { |
| 181 | + if ($mark['type'] === 'bold') { |
| 182 | + $text = "<strong>$text</strong>"; |
| 183 | + } elseif ($mark['type'] === 'italic') { |
| 184 | + $text = "<em>$text</em>"; |
| 185 | + } |
| 186 | + } |
| 187 | + return $text; |
| 188 | + } |
| 189 | + |
| 190 | + if ($inline['nodeType'] === 'hyperlink') { |
| 191 | + $url = htmlspecialchars($inline['data']['uri'] ?? ''); |
| 192 | + $linkText = ''; |
| 193 | + foreach ($inline['content'] as $linkNode) { |
| 194 | + $linkText .= $this->renderInlineNode($linkNode, $linkedEntries); |
| 195 | + } |
| 196 | + return "<a href=\"$url\">$linkText</a>"; |
| 197 | + } |
| 198 | + |
| 199 | + if ($inline['nodeType'] === 'entry-hyperlink') { |
| 200 | + $entryId = $inline['data']['target']['sys']['id'] ?? ''; |
| 201 | + $linkedEntry = $linkedEntries[$entryId] ?? null; |
| 202 | + $linkText = ''; |
| 203 | + foreach ($inline['content'] as $linkNode) { |
| 204 | + $linkText .= $this->renderInlineNode($linkNode, $linkedEntries); |
| 205 | + } |
| 206 | + |
| 207 | + if ($linkedEntry && isset($linkedEntry['slug'])) { |
| 208 | + $href = self::URI . 'blogs/' . $linkedEntry['slug']; |
| 209 | + return "<a href=\"$href\">$linkText</a>"; |
| 210 | + } |
| 211 | + |
| 212 | + return $linkText; |
| 213 | + } |
| 214 | + |
| 215 | + return ''; |
| 216 | + } |
| 217 | + |
| 218 | + private function renderImageHtml($image) |
| 219 | + { |
| 220 | + $url = htmlspecialchars($image['secure_url'] ?? ''); |
| 221 | + $alt = htmlspecialchars($image['context']['custom']['alt'] ?? ''); |
| 222 | + $credit = htmlspecialchars($image['context']['custom']['credit'] ?? ''); |
| 223 | + $caption = htmlspecialchars($image['context']['custom']['caption'] ?? ''); |
| 224 | + |
| 225 | + $html = '<div style="margin: 1em 0;">'; |
| 226 | + $html .= "<img src=\"$url\" alt=\"$alt\" style=\"max-width:100%;\" />"; |
| 227 | + if ($caption || $credit) { |
| 228 | + $html .= '<p style="font-size: small; color: #555;">'; |
| 229 | + if ($caption) { |
| 230 | + $html .= "<em>$caption</em><br>"; |
| 231 | + } |
| 232 | + if ($credit) { |
| 233 | + $html .= "Credit: $credit"; |
| 234 | + } |
| 235 | + $html .= '</p>'; |
| 236 | + } |
| 237 | + $html .= '</div>'; |
| 238 | + |
| 239 | + return $html; |
| 240 | + } |
| 241 | +} |
0 commit comments