Skip to content

Commit c6553fa

Browse files
committed
init commit
0 parents  commit c6553fa

File tree

9 files changed

+449
-0
lines changed

9 files changed

+449
-0
lines changed

Cakefile

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
fs = require 'fs'
2+
3+
{print} = require 'sys'
4+
{spawn} = require 'child_process'
5+
6+
build = (callback) ->
7+
coffee = spawn 'coffee', ['-c', '-o', 'lib', 'src']
8+
coffee.stderr.on 'data', (data) ->
9+
process.stderr.write data.toString()
10+
coffee.stdout.on 'data', (data) ->
11+
print data.toString()
12+
coffee.on 'exit', (code) ->
13+
callback?() if code is 0
14+
15+
task 'build', 'Build lib/ from src/', ->
16+
build()
17+
18+
task 'watch', 'Watch src/ for changes', ->
19+
coffee = spawn 'coffee', ['-w', '-c', '-o', 'lib', 'src']
20+
coffee.stderr.on 'data', (data) ->
21+
process.stderr.write data.toString()
22+
coffee.stdout.on 'data', (data) ->
23+
print data.toString()

TODO.markdown

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
TODO
2+
====
3+
Crawler
4+
- clean up FN()getClassList: text().trim() repetiveness
5+
6+
Misc
7+
- add some logging

src/config/db_example.coffee

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
mongoose = require 'mongoose'
2+
3+
db =
4+
_db: null,
5+
user: username
6+
pass: password
7+
host: host
8+
port: 10087
9+
name: "csquared"
10+
init: () ->
11+
if not @_db
12+
@_db = mongoose.connect "mongodb://" +
13+
"#{@user}:#{@pass}@#{@host}:#{@port}/#{@name}"
14+
15+
module.exports = db

src/core.coffee

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Crawler = require './crawler/crawler'

src/crawler/crawler.coffee

Lines changed: 295 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,295 @@
1+
request = require 'request'
2+
jsdom = require 'jsdom'
3+
Models = require '../models/models' # Schemas Container
4+
Subjects = Models.subjects
5+
Meta = Models.meta
6+
Courses = Models.courses
7+
baseUrl = 'https://webapp4.asu.edu/catalog'
8+
jQueryUrl = ['https://ajax.googleapis.com/ajax/libs/jquery/1.7.2/jquery.min.js']
9+
class Crawler
10+
@jsession
11+
12+
getJSession: (cb) ->
13+
cookieJar = request.jar()
14+
jsessionid = ''
15+
that = @
16+
17+
cookie = request.cookie 'onlineCampusSelection=C'
18+
cookie.value = 'C'
19+
cookie.path = '/catalog'
20+
cookieJar.add cookie
21+
22+
options =
23+
"url": "https://webapp4.asu.edu/catalog/"
24+
"jar": cookieJar
25+
"followRedirect": false
26+
27+
request options, (error, response, body) ->
28+
if error?
29+
console.log "Error: #{error}"
30+
else
31+
for prop,i in cookieJar.cookies
32+
if cookieJar.cookies[i].name == 'JSESSIONID'
33+
jsessionid = cookieJar.cookies[i].value
34+
35+
cookie = request.cookie 'JSESSIONID=' + jsessionid
36+
cookie.value = jsessionid
37+
cookieJar.add cookie
38+
that.jsession = jsessionid
39+
40+
cb(cookieJar)
41+
42+
updateCurrentTerm: () ->
43+
request
44+
url: "https://webapp4.asu.edu/catalog/TooltipTerms.ext"
45+
, (error, response, body) ->
46+
jsdom.env body
47+
, ['https://ajax.googleapis.com/ajax/libs/jquery/1.7.2/jquery.min.js']
48+
, (errors, window) ->
49+
$ = window.jQuery
50+
termList = $('#termList').find('span a')
51+
summerTermRegex = /(Summer|Sum)/i
52+
currentTerm = []
53+
54+
# If first node is not summer, then just use first node
55+
if termList.eq(0).text()?
56+
if termList.eq(0).text().match(summerTermRegex) is null
57+
currentTerm.push termList.eq(0).attr('href').match(/\d+?$/)[0]
58+
#console.log "CurrentTermID: #{currentTerm}"
59+
else
60+
# Since it is summer, check the next 3 nodes for additional summer terms
61+
for i in [0..3]
62+
term = termList.eq(i).text()
63+
if term?
64+
if term.match(summerTermRegex)?
65+
summerTerm = termList.eq(i).attr('href').match(/\d{4}[A-z]?/)[0]
66+
67+
# check if term matches with current year
68+
summerTermYr = summerTerm.match(/\d(\d{2})\d[A-z]?/)[1]
69+
currentYr = new Date().getFullYear()
70+
.toString().match(/\d{2}$/)[0]
71+
72+
if summerTermYr == currentYr
73+
currentTerm.push summerTerm
74+
75+
MetaModel = Meta.model()
76+
MetaModel.update({}, {currentTerm: currentTerm}
77+
, {multi:true}, (err, numAffected) ->
78+
if err?
79+
console.log "Error: #{err}"
80+
else
81+
console.log "Row(s) affected: #{numAffected}"
82+
)
83+
84+
getSubjects: () ->
85+
@getJSession (cookieJar) ->
86+
request
87+
url: "https://webapp4.asu.edu/catalog/Subjects.html"
88+
jar: cookieJar
89+
, (error, response, body) ->
90+
jsdom.env body, jQueryUrl, (errors, window) ->
91+
#String::trim = () -> @replace /^\s+|\s+$/g, ''
92+
93+
$ = window.jQuery
94+
subjectsNodes = $('#subjectDivs').find('.row')
95+
96+
subjects = []
97+
names = []
98+
nRows = subjectsNodes.length
99+
100+
subjectsNodes.each () ->
101+
subject = $(this).find('div.subject').text()
102+
name = $(this).find('div.subjectTitle').text()
103+
104+
# read-only model instance
105+
SubjectsModel = Subjects.model()
106+
107+
# check if subject already exists
108+
SubjectsModel.findOne
109+
subject: subject
110+
, ['subject']
111+
, (err, doc) ->
112+
if doc?
113+
console.log "Skipping... #{doc.subject} @ #{new Date()}"
114+
else
115+
console.log "Creating doc for... #{subject} @ #{new Date()}"
116+
117+
SubjectsInstance = Subjects.model(true)
118+
SubjectsInstance.subject = subject
119+
SubjectsInstance.name = name
120+
121+
SubjectsInstance.save (err, result) ->
122+
if err? then console.log "Error: #{err}"
123+
124+
# finished, close mongo connection
125+
if not --nRows
126+
setTimeout(->
127+
Models.close()
128+
, 1000)
129+
console.log "MongoDB connection closed... @ #{new Date()}"
130+
131+
getCourseList: (subject, termID) ->
132+
@getJSession (cookieJar) ->
133+
request
134+
url: "#{baseUrl}/classlist?s=#{subject}&t=#{termID}&e=all"
135+
jar: cookieJar
136+
, (error, response, body) ->
137+
jsdom.env body, jQueryUrl, (errors, window) ->
138+
String::trim = () -> @replace /^\s+|\s+$/g, ''
139+
140+
$ = window.jQuery
141+
courseNodes = $('#CatalogList > tbody > tr')
142+
nCourses = courseNodes.length
143+
144+
courseNodes.each () ->
145+
# initial parsed values
146+
courseNode = $(this)
147+
courseId = courseNode.find('.classNbrColumnValue a').text().trim()
148+
number = courseNode.find('.subjectNumberColumnValue')
149+
.text().trim().split(/\s/)[1]
150+
title = courseNode.find('.titleColumnValue a').text().trim()
151+
units = courseNode.find('.hoursColumnValue').text().trim()
152+
startDate = courseNode.find('.startDateColumnValue a')
153+
.text().trim().split(/\s-\s/g)[0]
154+
endDate = courseNode.find('.startDateColumnValue a')
155+
.text().trim().split(/\s-\s/g)[1].replace(/\(C\)/g, '')
156+
days = courseNode.find('.dayListColumnValue').text().trim()
157+
startTime = courseNode.find('.startTimeDateColumnValue')
158+
.text().trim()
159+
endTime = courseNode.find('.endTimeDateColumnValue').text().trim()
160+
gstudy = courseNode.find('.tooltipRqDesDescrColumnValue .gstip')
161+
.text().trim()
162+
location = courseNode.find('.locationBuildingColumnValue')
163+
.text().trim()
164+
instructorsTmp = courseNode
165+
.find('.instructorListColumnValue > span > span')
166+
openSeats = courseNode.find('.availableSeatsColumnValue')
167+
.find('table> tr > td:eq(0)').text().trim()
168+
maxSeats = courseNode.find('.availableSeatsColumnValue')
169+
.find('table> tr > td:eq(2)').text().trim()
170+
171+
lastClosed = null
172+
lastOpened = null
173+
status = null
174+
instructors = []
175+
176+
# further data processing/formatting
177+
instructorsTmp.each () ->
178+
#console.log $(this).find('span > span > a').attr('title')
179+
instructor = $(this).find('span > span > a').attr('title')
180+
181+
if instructor?
182+
instructor = $(this).find('span > span > a').attr('title')
183+
.split('|')[1]
184+
else
185+
instructor = $(this).text().trim()
186+
instructors.push instructor
187+
188+
189+
honors = if /Honor/gi.test(title) then true else false
190+
191+
# CourseID given in Y{termID}Y{classId} format to help ensure
192+
# it is unique.
193+
courseId = "Y#{termID}Y#{courseId}"
194+
195+
# read-only
196+
CoursesModel = Courses.model()
197+
198+
CoursesModel.findOne
199+
courseId: courseId
200+
, ['courseId', 'openSeats']
201+
, (err, course) ->
202+
if course?
203+
prevOpen = parseInt \
204+
course.openSeats[course.openSeats.length - 1]
205+
openSeats = parseInt openSeats
206+
207+
condition = courseId: courseId
208+
update = {}
209+
210+
# class still open
211+
if openSeats > 0 and prevOpen > 0
212+
update =
213+
$push:
214+
openSeats: openSeats
215+
lastOpened: new Date()
216+
$set:
217+
status: 'Open'
218+
219+
# class still closed
220+
if openSeats == 0 and prevOpen == 0
221+
update =
222+
$push:
223+
openSeats: openSeats
224+
lastClosed: new Date()
225+
$set:
226+
status: 'Closed'
227+
228+
# class just closed, no available seats
229+
if openSeats == 0 and prevOpen > 0
230+
update =
231+
$push:
232+
openSeats: openSeats
233+
lastClosed: new Date()
234+
$set:
235+
status: 'Just closed'
236+
237+
# class just opened, available seats
238+
if openSeats > 0 and prevOpen == 0
239+
update =
240+
$push:
241+
openSeats: openSeats
242+
lastOpened: new Date()
243+
$set:
244+
status: 'Just opened'
245+
246+
CoursesModel.update(condition, update, {}, (err, nAffected) ->
247+
if err?
248+
console.log "Error updating: #{err}"
249+
else
250+
console.log "Updated #{courseId} @ #{new Date()}"
251+
)
252+
else
253+
console.log "New #{courseId.trim()} @ #{new Date()}"
254+
255+
CoursesInst = Courses.model(true)
256+
CoursesInst.courseId = courseId
257+
CoursesInst.subject = subject
258+
CoursesInst.number = number
259+
CoursesInst.title = title
260+
CoursesInst.units = units
261+
CoursesInst.startDate = startDate
262+
CoursesInst.endDate = endDate
263+
CoursesInst.days = days
264+
CoursesInst.startTime = startTime
265+
CoursesInst.endTime = endTime
266+
CoursesInst.gstudy = gstudy
267+
CoursesInst.instructor = instructors
268+
CoursesInst.honors = honors
269+
CoursesInst.openSeats = [openSeats]
270+
CoursesInst.maxSeats = maxSeats
271+
CoursesInst.status = status
272+
273+
if parseInt(openSeats) == 0
274+
CoursesInst.lastClosed = [new Date()]
275+
CoursesInst.lastOpened = []
276+
CoursesInst.status = 'Closed'
277+
else
278+
CoursesInst.lastClosed = []
279+
CoursesInst.lastOpened = [new Date()]
280+
CoursesInst.status = 'Open'
281+
282+
CoursesInst.save (err) ->
283+
if err? then console.log "Error: #{err}"
284+
285+
if not --nCourses
286+
setTimeout(->
287+
Models.close()
288+
, 1000)
289+
console.log "MongoDB connection closed... @ #{new Date()}"
290+
291+
asyncBatch: (batch) ->
292+
console.log 'Using #{@jsession}'
293+
console.log 'working on the batch...'
294+
295+
module.exports = Crawler

src/models/courses.coffee

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
db = require '../config/db'
2+
mongoose = require 'mongoose'
3+
Schema = mongoose.Schema
4+
5+
# connect to db
6+
db.init()
7+
8+
module.exports =
9+
_schema: null
10+
11+
_schemaDef:
12+
courseId : type: String, index: true, required: true
13+
subject : type: String, index: true
14+
number : String
15+
title : String
16+
units : String
17+
startDate : Date
18+
endDate : Date
19+
days : [String]
20+
startTime : String
21+
endTime : String
22+
genStudy : String
23+
instructor: [String]
24+
honors : Boolean
25+
openSeats : [Number] # last index is the latest update
26+
maxSeats : Number
27+
lastClosed: [Date] # DateTime logged when class just closes
28+
lastOpened: [Date] # DateTime logged when class just opens
29+
status : String # Possible status - Open/Closed, Just Opened/Closed
30+
31+
schema: () ->
32+
if not module.exports._schema
33+
module.exports._schema = new mongoose.Schema(module.exports._schemaDef)
34+
return module.exports._schema
35+
36+
_model: null
37+
38+
model: (newInstance) ->
39+
if not module.exports._model
40+
module.exports._model = mongoose.model 'course', module.exports.schema()
41+
if newInstance?
42+
return new module.exports._model()
43+
else
44+
return module.exports._model

0 commit comments

Comments
 (0)