refactor: 更改成使用學校開放的csv檔對應通識分類
This commit is contained in:
parent
01d4c08372
commit
73d872b4a6
@ -35,11 +35,14 @@
|
|||||||
- [x] 把版排好(選課框框改成可下拉(才可以同時看到課表))
|
- [x] 把版排好(選課框框改成可下拉(才可以同時看到課表))
|
||||||
|
|
||||||
# 課程爬蟲使用說明
|
# 課程爬蟲使用說明
|
||||||
|
|
||||||
安裝所需套件
|
安裝所需套件
|
||||||
```
|
```
|
||||||
pip install -r requirements.txt
|
pip install -r requirements.txt
|
||||||
```
|
```
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
登入教務系統取得個人登入cookie,貼到`getData.py` line 11即可執行。
|
登入教務系統取得個人登入cookie,貼到`getData.py` line 11即可執行。
|
||||||
|
|
||||||
過程中取得所有課程相關html資料,整理過後輸出`output.json`
|
過程中取得所有課程相關html資料,整理過後輸出`output.json`
|
||||||
|
|||||||
129
getData.py
129
getData.py
@ -1,6 +1,7 @@
|
|||||||
import requests
|
import requests
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
|
import csv
|
||||||
from bs4 import BeautifulSoup as bs
|
from bs4 import BeautifulSoup as bs
|
||||||
|
|
||||||
|
|
||||||
@ -11,8 +12,31 @@ header = {
|
|||||||
|
|
||||||
mainURL = "https://ccweb.ncnu.edu.tw/student/"
|
mainURL = "https://ccweb.ncnu.edu.tw/student/"
|
||||||
courses = []
|
courses = []
|
||||||
|
generalCourse = []
|
||||||
|
|
||||||
|
def getGeneralCourseData(year):
|
||||||
|
'''
|
||||||
|
透過年份取得 通識課程分類的csv檔
|
||||||
|
供後續課程對應。
|
||||||
|
|
||||||
|
先儲存到 generalCourse list,後續再用 courseID 對應通識分類
|
||||||
|
'''
|
||||||
|
|
||||||
|
# 教務系統有開放 年度的query
|
||||||
|
# 但實際操作後似乎僅開放當前學年度
|
||||||
|
response = requests.get(mainURL+"aspmaker_student_common_rank_courses_viewlist.php?x_studentid=0&z_studentid=LIKE&x_year={}&z_year=%3D&cmd=search&export=csv".format(year), headers=header)
|
||||||
|
data = response.text
|
||||||
|
|
||||||
|
courses = data.split('\r\n')[1:-1]
|
||||||
|
for course in courses:
|
||||||
|
course = course.split(',')
|
||||||
|
generalCourse.append(course)
|
||||||
|
|
||||||
def curlDepartmentCourseTable(year):
|
def curlDepartmentCourseTable(year):
|
||||||
|
'''
|
||||||
|
先取得各科系的開課表格連結
|
||||||
|
再將連結丟給 extractDepartmentCourseTable() 取得課程資訊
|
||||||
|
'''
|
||||||
print("取得所有課程資料:")
|
print("取得所有課程資料:")
|
||||||
|
|
||||||
response = requests.get(mainURL+"aspmaker_course_opened_semester_stat_viewlist.php?x_year={}&recperpage=ALL".format(year), headers=header)
|
response = requests.get(mainURL+"aspmaker_course_opened_semester_stat_viewlist.php?x_year={}&recperpage=ALL".format(year), headers=header)
|
||||||
@ -28,18 +52,13 @@ def curlDepartmentCourseTable(year):
|
|||||||
count += 1
|
count += 1
|
||||||
extractDepartmentCourseTable(name, link) # 透過連結 開始擷取 各科系課程
|
extractDepartmentCourseTable(name, link) # 透過連結 開始擷取 各科系課程
|
||||||
|
|
||||||
# def curlGeneralCoursePage():
|
|
||||||
# print("取得通識課資料:")
|
|
||||||
# progress = tqdm(total=generalFinalPage)
|
|
||||||
# for page in range(1, generalFinalPage+1):
|
|
||||||
# url = 'https://ccweb.ncnu.edu.tw/student/aspmaker_student_common_rank_courses_viewlist.php?pageno={}'.format(page)
|
|
||||||
# response = requests.get(url, headers=header)
|
|
||||||
# data = response.text
|
|
||||||
# with open('general/{}.html'.format(page), 'w') as fp:
|
|
||||||
# fp.write(data)
|
|
||||||
# progress.update(1)
|
|
||||||
|
|
||||||
def extractDepartmentCourseTable(departmentName, link):
|
def extractDepartmentCourseTable(departmentName, link):
|
||||||
|
'''
|
||||||
|
透過各科系連結取得課程資訊
|
||||||
|
若為通識類別還要跟csv檔資料做對應,取得正確通識類別
|
||||||
|
|
||||||
|
對應後存取到 output.json
|
||||||
|
'''
|
||||||
response = requests.get(link, headers=header)
|
response = requests.get(link, headers=header)
|
||||||
data = response.text
|
data = response.text
|
||||||
root = bs(data, "html.parser")
|
root = bs(data, "html.parser")
|
||||||
@ -60,80 +79,34 @@ def extractDepartmentCourseTable(departmentName, link):
|
|||||||
courseObj['teacher'] = tds[8].find('span').string
|
courseObj['teacher'] = tds[8].find('span').string
|
||||||
courseObj['place'] = tds[9].find('span').string
|
courseObj['place'] = tds[9].find('span').string
|
||||||
courseObj['time'] = tds[11].find('span').string
|
courseObj['time'] = tds[11].find('span').string
|
||||||
|
|
||||||
|
if courseObj['department']=="99, 通識" :
|
||||||
|
flag = False
|
||||||
|
for row in generalCourse:
|
||||||
|
if row[2] == '"{}"'.format(courseObj['number']):
|
||||||
|
courseObj['department'] = row[0].replace('"', '')
|
||||||
|
generalCourse.remove(row)
|
||||||
|
flag = True
|
||||||
|
break
|
||||||
|
if not flag:
|
||||||
|
print(" - 找不到對應的通識類別: {} ( {} )".format(courseObj['name'], courseObj['number']))
|
||||||
|
|
||||||
courses.append(courseObj)
|
courses.append(courseObj)
|
||||||
|
|
||||||
with open('output.json', 'w') as fp:
|
with open('output.json', 'w') as fp:
|
||||||
json.dump(courses, fp)
|
json.dump(courses, fp)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
# print("解析所有課程html:")
|
|
||||||
# progress = tqdm(total=allFinalPage)
|
|
||||||
# for pageNumber in range(1, allFinalPage+1):
|
|
||||||
# html = ""
|
|
||||||
# with open('all/{}.html'.format(pageNumber), 'r') as fp:
|
|
||||||
# html = fp.read()
|
|
||||||
# root = bs(html, "html.parser")
|
|
||||||
# courses = root.find_all('tr')
|
|
||||||
# courses = courses[1:]
|
|
||||||
# for course in courses:
|
|
||||||
# courseObj = {}
|
|
||||||
# tds = course.find_all('td')
|
|
||||||
# tds = tds[1:]
|
|
||||||
# courseObj['year'] = tds[0].text.replace('\n', '')
|
|
||||||
# courseObj['number'] = tds[1].text.replace('\n', '')
|
|
||||||
# courseObj['name'] = tds[3].text.replace('\n', '')
|
|
||||||
# courseObj['class'] = tds[2].text.replace('\n', '')
|
|
||||||
# courseObj['department'] = tds[4].text.replace('\n', '')
|
|
||||||
# courseObj['graduated'] = tds[5].text.replace('\n', '')
|
|
||||||
# courseObj['grade'] = tds[6].text.replace('\n', '')
|
|
||||||
# courseObj['teacher'] = tds[7].text.replace('\n', '')
|
|
||||||
# courseObj['place'] = tds[8].text.replace('\n', '')
|
|
||||||
# courseObj['time'] = tds[10].text.replace('\n', '')
|
|
||||||
|
|
||||||
# courseObjList[
|
|
||||||
# tds[1].text.replace('\n', '')
|
|
||||||
# +
|
|
||||||
# tds[2].text.replace('\n', '')
|
|
||||||
# ] = courseObj
|
|
||||||
# progress.update(1)
|
|
||||||
|
|
||||||
# def extractGeneralCourse():
|
|
||||||
# print("解析通識課html:")
|
|
||||||
# progress = tqdm(total=generalFinalPage)
|
|
||||||
# for pageNumber in range(1, generalFinalPage+1):
|
|
||||||
# html = ""
|
|
||||||
# with open('general/{}.html'.format(pageNumber), 'r') as fp:
|
|
||||||
# html = fp.read()
|
|
||||||
# root = bs(html, "html.parser")
|
|
||||||
# courses = root.find_all('tr')
|
|
||||||
# courses = courses[1:]
|
|
||||||
# for course in courses:
|
|
||||||
# courseObj = {}
|
|
||||||
# tds = course.find_all('td')
|
|
||||||
# number = tds[3].text.replace('\n', '')
|
|
||||||
# classNum = tds[4].text.replace('\n','')
|
|
||||||
# major = tds[1].text.replace('\n', '')
|
|
||||||
# name = tds[6].text.replace('\n', '')
|
|
||||||
# old = courseObjList[number+classNum]['department']
|
|
||||||
# if old != "90, 體育室":
|
|
||||||
# courseObjList[number+classNum]['department'] = major
|
|
||||||
# progress.update(1)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
year = input("年份: ")
|
year = input("年份: ")
|
||||||
|
|
||||||
|
getGeneralCourseData(year)
|
||||||
curlDepartmentCourseTable(year)
|
curlDepartmentCourseTable(year)
|
||||||
# extractAllCourse()
|
|
||||||
# curlGeneralCoursePage()
|
|
||||||
# extractGeneralCourse()
|
|
||||||
|
|
||||||
# out = []
|
print("\n\n=====================")
|
||||||
# count = 0
|
print("未列入追蹤的通識課程")
|
||||||
# for item in courseObjList:
|
print("=====================\n")
|
||||||
# count = count+1
|
|
||||||
# out.append(courseObjList[item])
|
|
||||||
|
|
||||||
# with open('output.json', 'w') as fp:
|
for notIn in generalCourse:
|
||||||
# fp.write(json.dumps(out, ensure_ascii=False))
|
if "體育:" not in notIn[5]:
|
||||||
# print(count)
|
print(" - 未列入追蹤的新通識課程: {}".format(notIn))
|
||||||
Loading…
Reference in New Issue
Block a user