Skip to content

fix: upstream provider fixes #711

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Dec 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 4 additions & 35 deletions src/mb_enhanced_cover_art_uploads/providers/amazon.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ import { ArtworkTypeIDs } from '@lib/MB/CoverArt';
import { assertNonNull } from '@lib/util/assert';
import { parseDOM, qsMaybe } from '@lib/util/dom';
import { safeParseJSON } from '@lib/util/json';
import { urlJoin } from '@lib/util/urls';

import type { CoverArt } from '../types';
import { CoverArtProvider } from './base';
Expand Down Expand Up @@ -36,13 +35,10 @@ const VARIANT_TYPE_MAPPING: Record<string, ArtworkTypeIDs | undefined> = {

// CSS queries to figure out which type of page we're on
const AUDIBLE_PAGE_QUERY = '#audibleProductTitle'; // Product title with Audible logo on standard product pages
const DIGITAL_PAGE_QUERY = '.DigitalMusicDetailPage'; // TODO: Does this still exist?
const MUSIC_DIGITAL_PAGE_QUERY = '#nav-global-location-data-modal-action[data-a-modal*="dmusicRetailMp3Player"]'; // Dynamically loaded Amazon Music digital pages.
const PHYSICAL_AUDIOBOOK_PAGE_QUERY = '#booksImageBlock_feature_div';

// CSS queries to extract a front cover from a page
const AUDIBLE_FRONT_IMAGE_QUERY = '#mf_pdp_hero_widget_book_img img'; // Only for /hz/audible/mlp/mfpdp pages.
const DIGITAL_FRONT_IMAGE_QUERY = '#digitalMusicProductImage_feature_div > img';
const AUDIBLE_FRONT_IMAGE_QUERY = '#audibleimageblock_feature_div #main-image'; // Only for page which have Audible releases.

export class AmazonProvider extends CoverArtProvider {
public readonly supportedDomains = [
Expand Down Expand Up @@ -70,21 +66,16 @@ export class AmazonProvider extends CoverArtProvider {
throw new Error('Amazon served a captcha page');
}

let finder: typeof this.findDigitalImages;
let finder: (url: URL, pageContent: string, pageDom: Document) => Promise<CoverArt[]>;

/* eslint-disable @typescript-eslint/unbound-method -- Bound further down */
if (qsMaybe(AUDIBLE_PAGE_QUERY, pageDom)) {
LOGGER.debug('Searching for images in Audible page');
finder = this.findAudibleImages;
} else if (qsMaybe(DIGITAL_PAGE_QUERY, pageDom)) {
LOGGER.debug('Searching for images in digital release page');
finder = this.findDigitalImages;
} else if (qsMaybe(MUSIC_DIGITAL_PAGE_QUERY, pageDom)) {
// Amazon made it really difficult to extract images from these sort
// of pages, so we don't support it for now.
throw new Error('Amazon Music releases are currently not supported. Please use a different provider or copy the image URL manually.');
} else if (qsMaybe(PHYSICAL_AUDIOBOOK_PAGE_QUERY, pageDom)) {
LOGGER.debug('Searching for images in physical audiobook page');
finder = this.findPhysicalAudiobookImages;
} else {
LOGGER.debug('Searching for images in generic physical page');
finder = this.findGenericPhysicalImages;
Expand All @@ -105,29 +96,7 @@ export class AmazonProvider extends CoverArtProvider {
});
}

private async findPhysicalAudiobookImages(_url: URL, pageContent: string): Promise<CoverArt[]> {
const imgs = this.extractEmbeddedJSImages(pageContent, /\s*'imageGalleryData' : (.+),$/m) as Array<{ mainUrl: string }> | null;
assertNonNull(imgs, 'Failed to extract images from embedded JS on physical audiobook page');

// Amazon embeds no image variants on these pages, so we don't know the types
return imgs.map((img) => ({ url: new URL(img.mainUrl) }));
}

private async findDigitalImages(_url: URL, _pageContent: string, pageDom: Document): Promise<CoverArt[]> {
return this.extractFrontCover(pageDom, DIGITAL_FRONT_IMAGE_QUERY);
}

private async findAudibleImages(url: URL, _pageContent: string, pageDom: Document): Promise<CoverArt[]> {
// We can only extract 500px images from standard product pages. Prefer
// /hz/audible/mlp/mfpdp pages which should have the same image in its
// full resolution.
if (/\/(?:gp\/product|dp)\//.test(url.pathname)) {
const audibleUrl = urlJoin(url.origin, '/hz/audible/mlp/mfpdp/', this.extractId(url)!);
const audibleContent = await this.fetchPage(audibleUrl);
const audibleDom = parseDOM(audibleContent, audibleUrl.href);
return this.findAudibleImages(audibleUrl, audibleContent, audibleDom);
}

private async findAudibleImages(_url: URL, _pageContent: string, pageDom: Document): Promise<CoverArt[]> {
return this.extractFrontCover(pageDom, AUDIBLE_FRONT_IMAGE_QUERY);
}

Expand Down
50 changes: 2 additions & 48 deletions src/mb_enhanced_cover_art_uploads/providers/audiomack.ts
Original file line number Diff line number Diff line change
@@ -1,56 +1,10 @@
import { ArtworkTypeIDs } from '@lib/MB/CoverArt';
import { assertDefined, assertHasValue } from '@lib/util/assert';
import { safeParseJSON } from '@lib/util/json';
import { HeadMetaPropertyProvider } from './base';

import type { CoverArt } from '../types';
import { CoverArtProvider } from './base';

interface AudiomackState {
// For one of these two, the info property will be null, depending on the
// URL.
musicAlbum: {
info: null | {
image: string;
};
};
musicSong: {
info: null | {
image: string;
};
};
}

export class AudiomackProvider extends CoverArtProvider {
export class AudiomackProvider extends HeadMetaPropertyProvider {
public readonly supportedDomains = ['audiomack.com'];
public readonly name = 'Audiomack';
public readonly favicon = 'https://audiomack.com/static/favicon-32x32.png';
// /song/ URLs may or may not be singles. We'll include song or album in the
// ID to prevent unsafe redirects from one to the other.
protected readonly urlRegex = /\.com\/([^/]+\/(?:song|album)\/[^/?#]+)/;

public async findImages(url: URL): Promise<CoverArt[]> {
const pageContent = await this.fetchPage(url, {
headers: {
// Audiomack loads all of the info dynamically when in a browser,
// and those requests require OAuth.
// However, it returns all info statically for CLI tools like
// curl and wget, and for crawlers like Google. Impersonate one
// so we don't have to deal with OAuth.
'User-Agent': 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)',
},
});
const initialStateText = pageContent.match(/window\.__INITIAL_STATE__ = (.+);\s*$/m)?.[1];
assertDefined(initialStateText, 'Could not parse Audiomack state from page');
const initialState = safeParseJSON<AudiomackState>(initialStateText);

const info = initialState?.musicAlbum.info ?? initialState?.musicSong.info;
assertHasValue(info, 'Could not retrieve music information from state');

// Albums can have track images, but those tracks could be singles, so
// we won't extract them.
return [{
url: new URL(info.image),
types: [ArtworkTypeIDs.Front],
}];
}
}
Loading