From 11a00fd098b44cd4db543b4dde573ac27d9c0c35 Mon Sep 17 00:00:00 2001 From: powe97 <116031952+powe97@users.noreply.github.com> Date: Wed, 7 Aug 2024 11:55:59 -0400 Subject: [PATCH 1/3] Remove driver.get(current url) --- PhotoScraper/RPI_SIS_PhotoScraper.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/PhotoScraper/RPI_SIS_PhotoScraper.py b/PhotoScraper/RPI_SIS_PhotoScraper.py index 8cdaf0f..c2f608e 100644 --- a/PhotoScraper/RPI_SIS_PhotoScraper.py +++ b/PhotoScraper/RPI_SIS_PhotoScraper.py @@ -446,9 +446,6 @@ def getStudentInfoFromCourseHelper(driver, term, class_list): input() raise - img_url = driver.current_url - driver.get(img_url) - # image, initalize to empty string student_record["img url"] = "" image_arr = driver.find_elements(By.TAG_NAME, "img") From 3252a65c2f36483017e6e9619e9415a502fe4caa Mon Sep 17 00:00:00 2001 From: powe97 <116031952+powe97@users.noreply.github.com> Date: Wed, 7 Aug 2024 15:03:31 -0400 Subject: [PATCH 2/3] [Refactor:Photoscraper] Refactor to dump images into 1 dir --- PhotoScraper/RPI_SIS_PhotoScraper.py | 32 +++++++++------------------- 1 file changed, 10 insertions(+), 22 deletions(-) diff --git a/PhotoScraper/RPI_SIS_PhotoScraper.py b/PhotoScraper/RPI_SIS_PhotoScraper.py index c2f608e..8dc1e39 100644 --- a/PhotoScraper/RPI_SIS_PhotoScraper.py +++ b/PhotoScraper/RPI_SIS_PhotoScraper.py @@ -173,24 +173,12 @@ def selectTerm(driver): ################################################################## # Saves the images with rcs id as image name to a term/course folder -def saveImagesToFolder(term, class_list): +def saveImagesToFolder(class_list): if len(class_list) == 0: return - course_crn = class_list[0]["course_crn"] - course_prefix = class_list[0]["course_prefix"] - course_name = class_list[0]["course_name"] - course_section = class_list[0]["course_section"] - course_number = class_list[0]["course_number"] - - course_folder_name = "{}-{}-{}".format(course_prefix, course_number, course_section) - - # make term (month year) into month-year - term_elements = term.split() - folder_term = term_elements[0] + "-" + term_elements[1] - # get path and create path if not already existed - path = Path(folder_term, course_folder_name) + path = Path("images") path.mkdir(exist_ok=True, parents=True) jsonfile = [] @@ -310,9 +298,7 @@ def getStudentInfoFromCourse(driver, term): if class_list == 0: print("Warning: this class size is 0") - else: - # Use the info collected and save the image with rcs id for term/course in current directory - saveImagesToFolder(term, class_list) + return class_list ################################################################## @@ -572,6 +558,8 @@ def loopOverCourses(driver, term): # click Course Information- Select a CRN driver.find_element(By.LINK_TEXT, "Course Information- Select a CRN").click() + courses = [] + # if there was at least one crn in the file if crns: driver.find_element( @@ -584,7 +572,7 @@ def loopOverCourses(driver, term): crn_box.send_keys(crn) crn_box.send_keys(Keys.TAB) crn_box.send_keys(Keys.RETURN) - getStudentInfoFromCourse(driver, term) + courses.append(getStudentInfoFromCourse(driver, term)) print("Finished processing CRN " + crn) return @@ -614,15 +602,15 @@ def loopOverCourses(driver, term): elif answer == "exit": return elif answer == "y": - print( - "Getting student pictures... (this could take a few seconds per student)" - ) + print("Getting list of students. Will scrape them upon exiting.") select_course.select_by_index(index) driver.find_element(By.XPATH, "//input[@value='Submit']").click() - getStudentInfoFromCourse(driver, term) + courses.append(getStudentInfoFromCourse(driver, term)) break else: print("Invalid answer! Try again!") + class_list = [student for course in courses for student in course] + saveImagesToFolder(class_list) # Assumes SIS main page is open From 5bfbdbd75b6ae57b3f293d871fd10cfdad27549f Mon Sep 17 00:00:00 2001 From: powe97 <116031952+powe97@users.noreply.github.com> Date: Wed, 7 Aug 2024 15:31:52 -0400 Subject: [PATCH 3/3] Remove dupes --- PhotoScraper/RPI_SIS_PhotoScraper.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/PhotoScraper/RPI_SIS_PhotoScraper.py b/PhotoScraper/RPI_SIS_PhotoScraper.py index 8dc1e39..9524b72 100644 --- a/PhotoScraper/RPI_SIS_PhotoScraper.py +++ b/PhotoScraper/RPI_SIS_PhotoScraper.py @@ -609,8 +609,8 @@ def loopOverCourses(driver, term): break else: print("Invalid answer! Try again!") - class_list = [student for course in courses for student in course] - saveImagesToFolder(class_list) + class_list = set([student for course in courses for student in course]) + saveImagesToFolder(list(class_list)) # Assumes SIS main page is open