From c61489ed9c8b665de8ddbc68d57da2872d2fd0a0 Mon Sep 17 00:00:00 2001 From: Ting-Jun Wang Date: Fri, 16 Jul 2021 21:10:58 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E4=BF=AE=E6=94=B9=E7=88=AC=E8=9F=B2?= =?UTF-8?q?=E6=96=B9=E5=BC=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- getData.py | 143 +++++++++++++++++++++++++---------------------------- 1 file changed, 67 insertions(+), 76 deletions(-) diff --git a/getData.py b/getData.py index eb2dfbb..6227f34 100644 --- a/getData.py +++ b/getData.py @@ -4,33 +4,35 @@ import os import csv from bs4 import BeautifulSoup as bs - -header = { - 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:79.0) Gecko/20100101 Firefox/79.0', - 'Cookie': '輸入登入暨大教務系統後所得到的cookie' -} +session = requests.Session() mainURL = "https://ccweb.ncnu.edu.tw/student/" courses = [] generalCourse = [] -def getGeneralCourseData(year): - ''' - 透過年份取得 通識課程分類的csv檔 - 供後續課程對應。 - - 先儲存到 generalCourse list,後續再用 courseID 對應通識分類 - ''' +def login(username, password): + global session + response = session.get('https://ccweb.ncnu.edu.tw/student/login.php') + root = bs(response.text, 'html.parser') + loginToken = root.find('input', {'name': 'token'}).get('value') - # 教務系統有開放 年度的query - # 但實際操作後似乎僅開放當前學年度 - response = requests.get(mainURL+"aspmaker_student_common_rank_courses_viewlist.php?x_studentid=0&z_studentid=LIKE&x_year={}&z_year=%3D&cmd=search&export=csv".format(year), headers=header) - data = response.text + # request login page + response = session.post( + "https://ccweb.ncnu.edu.tw/student/login.php", + data={ + 'token': loginToken, + 'modal': '0', + 'username': username, + 'password': password, + 'type': 'a' + } + ) - courses = data.split('\r\n')[1:-1] - for course in courses: - course = course.split(',') - generalCourse.append(course) + # 成功的話 return http 302, redirect + if len(response.history)!=0: + return True + else: + return False def curlDepartmentCourseTable(year): ''' @@ -39,74 +41,63 @@ def curlDepartmentCourseTable(year): ''' print("取得所有課程資料:") - response = requests.get(mainURL+"aspmaker_course_opened_semester_stat_viewlist.php?x_year={}&recperpage=ALL".format(year), headers=header) - data = response.text - root = bs(data, "html.parser") - - count = 1 - departmentsTR = root.findAll('tr')[1:] # 清除 thead - for tr in departmentsTR: - name = tr.findAll('td')[4].find('span').find('span').string # 取得 科系名稱 - link = mainURL + tr.find('a').get('data-url').replace('amp;', '') # 清除不必要符號, 取得 連結 - print("擷取{}課程... ({}/{})...".format(name, count, len(departmentsTR))) - count += 1 - extractDepartmentCourseTable(name, link) # 透過連結 開始擷取 各科系課程 + # 切換年度,應該是用 cookie 儲存當前閱覽的年份 + url = 'https://ccweb.ncnu.edu.tw/student/aspmaker_course_opened_detail_viewlist.php?cmd=search&t=aspmaker_course_opened_detail_view&z_year=%3D&x_year={}&z_courseid=%3D&x_courseid=&z_cname=LIKE&x_cname=&z_deptid=%3D&x_deptid=&z_division=LIKE&x_division=&z_grade=%3D&x_grade=&z_teachers=LIKE&x_teachers=&z_not_accessible=LIKE&x_not_accessible=' + response = session.get(url.format(year)) -def extractDepartmentCourseTable(departmentName, link): + # 取得 所有課程的 csv + response = session.get('https://ccweb.ncnu.edu.tw/student/aspmaker_course_opened_detail_viewlist.php?export=csv') + with open("allCourses.csv", "wb") as fp: + fp.write(response.content) + +def extractDepartmentCourseTable(year): ''' 透過各科系連結取得課程資訊 若為通識類別還要跟csv檔資料做對應,取得正確通識類別 對應後存取到 output.json ''' - response = requests.get(link, headers=header) - data = response.text - root = bs(data, "html.parser") + with open("allCourses.csv") as fp: + csvData = fp.read() - courseTR = root.findAll('tr')[1:] # 清除 thead - for tr in courseTR: - courseObj = {} - tds = tr.find_all('td') - - courseObj['link'] = mainURL + tds[0].find('a').get('href') - courseObj['year'] = tds[1].find('span').string - courseObj['number'] = tds[2].find('span').string - courseObj['class'] = tds[3].find('span').string - courseObj['name'] = tds[4].find('span').string - courseObj['department'] = tds[5].find('span').string - courseObj['graduated'] = tds[6].find('span').string - courseObj['grade'] = tds[7].find('span').string - courseObj['teacher'] = tds[8].find('span').string - courseObj['place'] = tds[9].find('span').string - courseObj['time'] = tds[11].find('span').string - - if courseObj['department']=="99, 通識" : - flag = False - for row in generalCourse: - if row[2] == '"{}"'.format(courseObj['number']): - courseObj['department'] = row[0].replace('"', '') - generalCourse.remove(row) - flag = True - break - if not flag: - print(" - 找不到對應的通識類別: {} ( {} )".format(courseObj['name'], courseObj['number'])) + ans = [] + courses = csvData.split('"\n')[1:-1] + for course in courses: + course = course.replace('\n', '.') + # print(course) + data = course[1:].split('","') - courses.append(courseObj) - - with open('output.json', 'w') as fp: - json.dump(courses, fp) + courseObj = {} + baseLink = "https://ccweb.ncnu.edu.tw/student/aspmaker_course_opened_detail_viewview.php?showdetail=&year={}&courseid={}&_class={}&modal=2" + courseObj['link'] = baseLink.format(year, data[1].zfill(6), data[2]) + courseObj['year'] = data[0] + courseObj['number'] = data[1] + courseObj['class'] = data[2] + courseObj['name'] = data[3] + courseObj['department'] = data[4] + courseObj['graduated'] = data[6] + courseObj['grade'] = data[7] + courseObj['teacher'] = data[8] + courseObj['place'] = data[9] + courseObj['time'] = data[13] + courseObj['credit'] = data[14] + + ans.append(courseObj) + + with open("歷年課程資料/{}_output.json".format(year), 'w') as fp: + json.dump(ans, fp, ensure_ascii=False) if __name__ == "__main__": year = input("年份: ") - getGeneralCourseData(year) + while True: + username = input("學號: ") + password = input("密碼: ") + if login(username, password): + break + else: + print("登入失敗!") + curlDepartmentCourseTable(year) - - print("\n\n=====================") - print("未列入追蹤的通識課程") - print("=====================\n") - - for notIn in generalCourse: - if "體育:" not in notIn[5]: - print(" - 未列入追蹤的新通識課程: {}".format(notIn)) \ No newline at end of file + extractDepartmentCourseTable("1101")