Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

RF: social tags checker #39

Merged
merged 2 commits into from
Jun 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions checks/checks.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,18 @@
)

type Checks struct {
Carbon *Carbon
Rank *Rank
Carbon *Carbon
Rank *Rank
SocialTags *SocialTags
}

func NewChecks() *Checks {
client := &http.Client{
Timeout: 5 * time.Second,
}
return &Checks{
Carbon: NewCarbon(client),
Rank: NewRank(client),
Carbon: NewCarbon(client),
Rank: NewRank(client),
SocialTags: NewSocialTags(client),

Check warning on line 21 in checks/checks.go

View check run for this annotation

Codecov / codecov/patch

checks/checks.go#L19-L21

Added lines #L19 - L21 were not covered by tests
}
}
94 changes: 94 additions & 0 deletions checks/social_tags.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
package checks

import (
"context"
"net/http"

"github.com/PuerkitoBio/goquery"
)

type SocialTagsData struct {
Title string `json:"title"`
Description string `json:"description"`
Keywords string `json:"keywords"`
CanonicalUrl string `json:"canonicalUrl"`
OgTitle string `json:"ogTitle"`
OgType string `json:"ogType"`
OgImage string `json:"ogImage"`
OgUrl string `json:"ogUrl"`
OgDescription string `json:"ogDescription"`
OgSiteName string `json:"ogSiteName"`
TwitterCard string `json:"twitterCard"`
TwitterSite string `json:"twitterSite"`
TwitterCreator string `json:"twitterCreator"`
TwitterTitle string `json:"twitterTitle"`
TwitterDescription string `json:"twitterDescription"`
TwitterImage string `json:"twitterImage"`
ThemeColor string `json:"themeColor"`
Robots string `json:"robots"`
Googlebot string `json:"googlebot"`
Generator string `json:"generator"`
Viewport string `json:"viewport"`
Author string `json:"author"`
Publisher string `json:"publisher"`
Favicon string `json:"favicon"`
}

func (s SocialTagsData) Empty() bool {
return (SocialTagsData{}) == s
}

type SocialTags struct {
client *http.Client
}

func NewSocialTags(client *http.Client) *SocialTags {
return &SocialTags{client: client}
}

func (s *SocialTags) GetSocialTags(ctx context.Context, url string) (*SocialTagsData, error) {
req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
if err != nil {
return nil, err
}

Check warning on line 53 in checks/social_tags.go

View check run for this annotation

Codecov / codecov/patch

checks/social_tags.go#L52-L53

Added lines #L52 - L53 were not covered by tests
resp, err := s.client.Do(req)
if err != nil {
return nil, err
}

Check warning on line 57 in checks/social_tags.go

View check run for this annotation

Codecov / codecov/patch

checks/social_tags.go#L56-L57

Added lines #L56 - L57 were not covered by tests
defer resp.Body.Close()

// Parse HTML document
doc, err := goquery.NewDocumentFromReader(resp.Body)
if err != nil {
return nil, err
}

Check warning on line 64 in checks/social_tags.go

View check run for this annotation

Codecov / codecov/patch

checks/social_tags.go#L63-L64

Added lines #L63 - L64 were not covered by tests

// Extract social tags metadata
tags := &SocialTagsData{
Title: doc.Find("head title").Text(),
Description: doc.Find("meta[name='description']").AttrOr("content", ""),
Keywords: doc.Find("meta[name='keywords']").AttrOr("content", ""),
CanonicalUrl: doc.Find("link[rel='canonical']").AttrOr("href", ""),
OgTitle: doc.Find("meta[property='og:title']").AttrOr("content", ""),
OgType: doc.Find("meta[property='og:type']").AttrOr("content", ""),
OgImage: doc.Find("meta[property='og:image']").AttrOr("content", ""),
OgUrl: doc.Find("meta[property='og:url']").AttrOr("content", ""),
OgDescription: doc.Find("meta[property='og:description']").AttrOr("content", ""),
OgSiteName: doc.Find("meta[property='og:site_name']").AttrOr("content", ""),
TwitterCard: doc.Find("meta[name='twitter:card']").AttrOr("content", ""),
TwitterSite: doc.Find("meta[name='twitter:site']").AttrOr("content", ""),
TwitterCreator: doc.Find("meta[name='twitter:creator']").AttrOr("content", ""),
TwitterTitle: doc.Find("meta[name='twitter:title']").AttrOr("content", ""),
TwitterDescription: doc.Find("meta[name='twitter:description']").AttrOr("content", ""),
TwitterImage: doc.Find("meta[name='twitter:image']").AttrOr("content", ""),
ThemeColor: doc.Find("meta[name='theme-color']").AttrOr("content", ""),
Robots: doc.Find("meta[name='robots']").AttrOr("content", ""),
Googlebot: doc.Find("meta[name='googlebot']").AttrOr("content", ""),
Generator: doc.Find("meta[name='generator']").AttrOr("content", ""),
Viewport: doc.Find("meta[name='viewport']").AttrOr("content", ""),
Author: doc.Find("meta[name='author']").AttrOr("content", ""),
Publisher: doc.Find("link[rel='publisher']").AttrOr("href", ""),
Favicon: doc.Find("link[rel='icon']").AttrOr("href", ""),
}
return tags, nil
}
65 changes: 65 additions & 0 deletions checks/social_tags_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
package checks

import (
"context"
"net/http"
"testing"

"github.com/stretchr/testify/assert"
"github.com/xray-web/web-check-api/testutils"
)

func TestSocialTagsEmpty(t *testing.T) {
t.Parallel()

t.Run("Empty", func(t *testing.T) {
t.Parallel()

s := SocialTagsData{}
assert.True(t, s.Empty())
})

t.Run("Not empty", func(t *testing.T) {
t.Parallel()

s := SocialTagsData{
Title: "Example Domain",
}
assert.False(t, s.Empty())
})
}

func TestNewSocialTags(t *testing.T) {
t.Parallel()

t.Run("No social tags", func(t *testing.T) {
t.Parallel()

client := testutils.MockClient(testutils.Response(http.StatusOK, []byte{}))
tags, err := NewSocialTags(client).GetSocialTags(context.TODO(), "http://example.com")
assert.NoError(t, err)
assert.True(t, tags.Empty())
})

t.Run("Social tags", func(t *testing.T) {
t.Parallel()

var html = []byte(`
<html>
<head>
<title>Example Domain</title>
<meta name="description" content="Example description">
<meta property="og:title" content="Example OG Title">
</head>
<body></body>
</html>
`)
client := testutils.MockClient(testutils.Response(http.StatusOK, html))
tags, err := NewSocialTags(client).GetSocialTags(context.TODO(), "http://example.com")
assert.NoError(t, err)
assert.False(t, tags.Empty())
assert.Equal(t, "Example description", tags.Description)
assert.Equal(t, "Example Domain", tags.Title)
assert.Equal(t, "Example OG Title", tags.OgTitle)
})
}
105 changes: 4 additions & 101 deletions handlers/social_tags.go
Original file line number Diff line number Diff line change
@@ -1,120 +1,23 @@
package handlers

import (
"errors"
"net/http"

"github.com/PuerkitoBio/goquery"
"github.com/xray-web/web-check-api/checks"
)

type SocialTags struct {
Title string `json:"title"`
Description string `json:"description"`
Keywords string `json:"keywords"`
CanonicalUrl string `json:"canonicalUrl"`
OgTitle string `json:"ogTitle"`
OgType string `json:"ogType"`
OgImage string `json:"ogImage"`
OgUrl string `json:"ogUrl"`
OgDescription string `json:"ogDescription"`
OgSiteName string `json:"ogSiteName"`
TwitterCard string `json:"twitterCard"`
TwitterSite string `json:"twitterSite"`
TwitterCreator string `json:"twitterCreator"`
TwitterTitle string `json:"twitterTitle"`
TwitterDescription string `json:"twitterDescription"`
TwitterImage string `json:"twitterImage"`
ThemeColor string `json:"themeColor"`
Robots string `json:"robots"`
Googlebot string `json:"googlebot"`
Generator string `json:"generator"`
Viewport string `json:"viewport"`
Author string `json:"author"`
Publisher string `json:"publisher"`
Favicon string `json:"favicon"`
}

func isEmpty(tags *SocialTags) bool {
return tags.Title == "" &&
tags.Description == "" &&
tags.Keywords == "" &&
tags.CanonicalUrl == "" &&
tags.OgTitle == "" &&
tags.OgType == "" &&
tags.OgImage == "" &&
tags.OgUrl == "" &&
tags.OgDescription == "" &&
tags.OgSiteName == "" &&
tags.TwitterCard == "" &&
tags.TwitterSite == "" &&
tags.TwitterCreator == "" &&
tags.TwitterTitle == "" &&
tags.TwitterDescription == "" &&
tags.TwitterImage == "" &&
tags.ThemeColor == "" &&
tags.Robots == "" &&
tags.Googlebot == "" &&
tags.Generator == "" &&
tags.Viewport == "" &&
tags.Author == "" &&
tags.Publisher == "" &&
tags.Favicon == ""
}

func HandleGetSocialTags() http.Handler {
func HandleGetSocialTags(s *checks.SocialTags) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
rawURL, err := extractURL(r)
if err != nil {
JSONError(w, ErrMissingURLParameter, http.StatusBadRequest)
return
}

// Fetch HTML content from the URL
resp, err := http.Get(rawURL.String())
tags, err := s.GetSocialTags(r.Context(), rawURL.String())
if err != nil {
JSONError(w, err, http.StatusInternalServerError)

Check warning on line 18 in handlers/social_tags.go

View check run for this annotation

Codecov / codecov/patch

handlers/social_tags.go#L18

Added line #L18 was not covered by tests
return
}
defer resp.Body.Close()

// Parse HTML document
doc, err := goquery.NewDocumentFromReader(resp.Body)
if err != nil {
return
}

// Extract social tags metadata
tags := &SocialTags{
Title: doc.Find("head title").Text(),
Description: doc.Find("meta[name='description']").AttrOr("content", ""),
Keywords: doc.Find("meta[name='keywords']").AttrOr("content", ""),
CanonicalUrl: doc.Find("link[rel='canonical']").AttrOr("href", ""),
OgTitle: doc.Find("meta[property='og:title']").AttrOr("content", ""),
OgType: doc.Find("meta[property='og:type']").AttrOr("content", ""),
OgImage: doc.Find("meta[property='og:image']").AttrOr("content", ""),
OgUrl: doc.Find("meta[property='og:url']").AttrOr("content", ""),
OgDescription: doc.Find("meta[property='og:description']").AttrOr("content", ""),
OgSiteName: doc.Find("meta[property='og:site_name']").AttrOr("content", ""),
TwitterCard: doc.Find("meta[name='twitter:card']").AttrOr("content", ""),
TwitterSite: doc.Find("meta[name='twitter:site']").AttrOr("content", ""),
TwitterCreator: doc.Find("meta[name='twitter:creator']").AttrOr("content", ""),
TwitterTitle: doc.Find("meta[name='twitter:title']").AttrOr("content", ""),
TwitterDescription: doc.Find("meta[name='twitter:description']").AttrOr("content", ""),
TwitterImage: doc.Find("meta[name='twitter:image']").AttrOr("content", ""),
ThemeColor: doc.Find("meta[name='theme-color']").AttrOr("content", ""),
Robots: doc.Find("meta[name='robots']").AttrOr("content", ""),
Googlebot: doc.Find("meta[name='googlebot']").AttrOr("content", ""),
Generator: doc.Find("meta[name='generator']").AttrOr("content", ""),
Viewport: doc.Find("meta[name='viewport']").AttrOr("content", ""),
Author: doc.Find("meta[name='author']").AttrOr("content", ""),
Publisher: doc.Find("link[rel='publisher']").AttrOr("href", ""),
Favicon: doc.Find("link[rel='icon']").AttrOr("href", ""),
}

if isEmpty(tags) {
JSONError(w, errors.New("no metadata found"), http.StatusBadRequest)
return
}

JSON(w, tags, http.StatusOK)
})
}
Loading
Loading