-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcronAdd.php
executable file
·199 lines (160 loc) · 5.35 KB
/
cronAdd.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
<?php
//mbrousseau - CPI - Created July 2014 - Updated July 2014
//Pulls the list of guides from the LibGuide API and enters the data as best as possible into the LibGuide forwarder
//I have no output. Put me in the crontab.
//Our log for debugging
$log = "Time started: ".time(). ".<br />";
//Bring in our info file
require("info.php");
//A really fragile way of grabbing each V1 link
if ($apiVersion == 1){
$arrayAPI = explode('<BR>', $rawAPI);
}
//Still fragile way of grabbing API V2 links
if ($apiVersion == 2){
$arrayAPI = explode('<li>', $rawAPI);
}
//For each link lets do some processing
foreach ($arrayAPI as $tag){
//Parsing the a tag for it's components
//Get the URL
if ($apiVersion == 1){
$url = getHref($tag);
}
if ($apiVersion == 2){
$url = getHrefv2($tag);
}
//Get the Subject
$subject = getSubject($tag);
//Get the subject short code
$code = getCode($url);
//Make sure everything has parsed properly and there is a 4 or 8 length code
if($url != FALSE && $subject != FALSE && $code != FALSE){
//Check to make sure the URL actually resolves
if (check404($url) != FALSE){
//Add it to the listing of guides
$result = addGuide($url, $subject, $code);
//Debugging - Don't print this with cron
if ($result == TRUE){
//All's well
$log .= $code." guide added successfully.<br />";
}
//Guide was not added successfully.
else{ $log .= "Unable to Add Guide. CODE = ".$code.", ERROR: ".$result.".<br />"; }
}
else{ $log .= "The URL came back with a 404. CODE = ".$code.", URL = ".$url.".<br />"; }
}
else{ $log .= "Tag info came back false. CODE = ".$code.", SUBJECT = ".$subject.", URL = ".$url.".<br />";}
}
//If it was a manual refresh let them know it's done.
if($_POST['refresh'] == "manual"){
echo "done";
}
//Log output for debugging
$log .= "Time finished: ".time(). ".<br />";
//echo $log;
//Takes an <a> tag and rips out the URL
function getHref($tag){
//Find the first quote and the get ? to grab the actual url from the a href
$firstQuote = strpos($tag, '"');
$firstQM = strpos($tag, '?');
//Strip it down to just the link using previous strpos
$url = substr($tag, ($firstQuote+1), (($firstQM-$firstQuote)-1));
//Make sure it's more than just http:// in there
if (strlen($url) > 7){
//Send back the url
return $url;
}
//Likely not a real url. Abort.
else { return FALSE; }
}
//Takes an <a> tag and rips out the URL for V2 api
function getHrefv2($tag){
//Find the first quote and the get ? to grab the actual url from the a href
$firstQuote = strpos($tag, '"');
$firstQM = strpos($tag, '"', $firstQuote+1);
//Strip it down to just the link using previous strpos
$url = substr($tag, ($firstQuote+1), (($firstQM-$firstQuote)-1));
//Make sure it's more than just http:// in there
if (strlen($url) > 7){
//Send back the url
return $url;
}
//Likely not a real url. Abort.
else { return FALSE; }
}
//Takes a URL and extracts the subject and/or course number
function getCode($url){
//Parse the url so we can get the path
$urlInfo = parse_url($url);
//Get rid of that starting slash
$urlInfo['path'] = str_replace("/", "", $urlInfo['path']);
//If it's a subject code
if (strlen($urlInfo['path']) == 4){
return $urlInfo['path'];
}
//If it's a course code
elseif (strlen($urlInfo['path']) == 8){
return $urlInfo['path'];
}
//Not one of those. Abort.
else{ return FALSE; }
}
//Take the tag and pull out the subject description
function getSubject($tag){
$firstPointy = strpos($tag, ">");
$endA = strpos($tag, "</a>");
//Grab just the subject text
$subj = substr($tag, ($firstPointy+1), (($endA-$firstPointy)-1));
//Return the subject text
if (isset($subj)){ return $subj; }
//It didn't get set for reasons. Abort.
else { return FALSE; }
}
//Make sure the url actually resolves
function check404($url){
//Make sure the link actually resolves
$handle = curl_init($url);
curl_setopt($handle, CURLOPT_RETURNTRANSFER, TRUE);
//Get the HTML or whatever is linked in $url
$response = curl_exec($handle);
//Check for 404 (file not found)
$httpCode = curl_getinfo($handle, CURLINFO_HTTP_CODE);
if($httpCode == 404) {
return FALSE;
}
//Otherwise send the url back
else {
return $url;
}
}
//Actually add the libguide checking to make sure it doesn't already exist
function addGuide($url, $subject, $code){
//Bring in the DB credentials
require("info.php");
//PDO to the database
$dbConnection = new PDO('mysql:dbname='.$database.';host='.$host.';charset=utf8', $username, $password);
$dbConnection->setAttribute(PDO::ATTR_EMULATE_PREPARES, false);
$dbConnection->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_EXCEPTION);
//Update old records if subject is the same or insert a new record
$statement = 'INSERT INTO `guideInfo` (`Name`, `Code`, `URL`, `addedBy`) VALUES(?,?,?,?) ON DUPLICATE KEY UPDATE `Name` = ?, `addedBy` = ?, `URL` = ?';
$exec = $dbConnection->prepare($statement);
//If the record was updated/inserted
if ($exec->execute(array($subject, $code, $url, "PHP", $subject, "PHP", $url))){
return TRUE;
}
//Didn't work return the PDO error
else { return $exec->errorCode(); }
}
//A better way of grabbing each link;
//Use the DOM to grab the a's and their href's
//PHP 5.3.6+
/*
$dom = new DOMDocument;
$dom->loadHTML($rawAPI);
$links = $list->getElementsByTagName('a');
foreach ($links as $link) {
$href = $link->getAttribute('href');
echo $href."<br />";
}*/
?>