Skip to content

Commit

Permalink
update G link scraping
Browse files Browse the repository at this point in the history
  • Loading branch information
thanhvg committed Sep 28, 2023
1 parent 5a8d787 commit 10f31a1
Show file tree
Hide file tree
Showing 3 changed files with 147 additions and 25 deletions.
5 changes: 3 additions & 2 deletions howdoyou.el
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
;; Author: Thanh Vuong <[email protected]>
;; URL: https://github.com/thanhvg/howdoyou/
;; Package-Requires: ((emacs "25.1") (promise "1.1") (request "0.3.3") (org "9.2"))
;; Version: 0.3.0
;; Version: 0.4.0

;; This program is free software; you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
Expand Down Expand Up @@ -155,7 +155,8 @@
"Extract links inside r class from DOM."
(let ((my-nodes (dom-by-class dom class)))
(mapcar (lambda (a-node)
(dom-attr (dom-child-by-tag (car (dom-children a-node)) 'a) 'href))
;; (setq thanh a-node)
(dom-attr (dom-child-by-tag (nth 2 (nth 2 a-node)) 'a) 'href))
my-nodes)))

(defun howdoyou--extract-links-from-google (dom)
Expand Down
4 changes: 1 addition & 3 deletions test/curl.sh
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
my_req="https://www.google.com/search?q=quit%20vim%20site%3Astackoverflow.com%20OR%20site%3Astackexchange.com%20OR%20site%3Asuperuser.com%20OR%20site%3Aserverfault.com%20OR%20site%3Aaskubuntu.com&hl=en"


my_agent=("Mozilla/5.0 (Macintosh; Intel Mac OS X 10.7; rv:11.0) Gecko/20100101 Firefox/11.0"
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:22.0) Gecko/20100 101 Firefox/22.0"
"Mozilla/5.0 (Windows NT 6.1; rv:11.0) Gecko/20100101 Firefox/11.0")
my_agent=("Mozilla/5.0 (X11; Linux i686; rv:109.0) Gecko/20100101 Firefox/118.0")

# my_agent=("foo" "bar")

Expand Down
Loading

0 comments on commit 10f31a1

Please sign in to comment.