-
Notifications
You must be signed in to change notification settings - Fork 11.5k
/
Copy pathgoogle-scholar-citations.rb
85 lines (65 loc) · 2.82 KB
/
google-scholar-citations.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
require "active_support/all"
require 'nokogiri'
require 'open-uri'
module Helpers
extend ActiveSupport::NumberHelper
end
module Jekyll
class GoogleScholarCitationsTag < Liquid::Tag
Citations = { }
def initialize(tag_name, params, tokens)
super
splitted = params.split(" ").map(&:strip)
@scholar_id = splitted[0]
@article_id = splitted[1]
if @scholar_id.nil? || @scholar_id.empty?
puts "Invalid scholar_id provided"
end
if @article_id.nil? || @article_id.empty?
puts "Invalid article_id provided"
end
end
def render(context)
article_id = context[@article_id.strip]
scholar_id = context[@scholar_id.strip]
article_url = "https://scholar.google.com/citations?view_op=view_citation&hl=en&user=#{scholar_id}&citation_for_view=#{scholar_id}:#{article_id}"
begin
# If the citation count has already been fetched, return it
if GoogleScholarCitationsTag::Citations[article_id]
return GoogleScholarCitationsTag::Citations[article_id]
end
# Sleep for a random amount of time to avoid being blocked
sleep(rand(1.5..3.5))
# Fetch the article page
doc = Nokogiri::HTML(URI.open(article_url, "User-Agent" => "Ruby/#{RUBY_VERSION}"))
# Attempt to extract the "Cited by n" string from the meta tags
citation_count = 0
# Look for meta tags with "name" attribute set to "description"
description_meta = doc.css('meta[name="description"]')
og_description_meta = doc.css('meta[property="og:description"]')
if !description_meta.empty?
cited_by_text = description_meta[0]['content']
matches = cited_by_text.match(/Cited by (\d+[,\d]*)/)
if matches
citation_count = matches[1].sub(",", "").to_i
end
elsif !og_description_meta.empty?
cited_by_text = og_description_meta[0]['content']
matches = cited_by_text.match(/Cited by (\d+[,\d]*)/)
if matches
citation_count = matches[1].sub(",", "").to_i
end
end
citation_count = Helpers.number_to_human(citation_count, :format => '%n%u', :precision => 2, :units => { :thousand => 'K', :million => 'M', :billion => 'B' })
rescue Exception => e
# Handle any errors that may occur during fetching
citation_count = "N/A"
# Print the error message including the exception class and message
puts "Error fetching citation count for #{article_id} in #{article_url}: #{e.class} - #{e.message}"
end
GoogleScholarCitationsTag::Citations[article_id] = citation_count
return "#{citation_count}"
end
end
end
Liquid::Template.register_tag('google_scholar_citations', Jekyll::GoogleScholarCitationsTag)