diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000..829fe74 Binary files /dev/null and b/.DS_Store differ diff --git a/.idea/QueryPpsucClassRoomSpider.iml b/.idea/QueryPpsucClassRoomSpider.iml new file mode 100644 index 0000000..1be6e63 --- /dev/null +++ b/.idea/QueryPpsucClassRoomSpider.iml @@ -0,0 +1,12 @@ + + + + + + + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 0000000..dd4c951 --- /dev/null +++ b/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,7 @@ + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..bae973d --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..b1e66bd --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..94a25f7 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/.idea/workspace.xml b/.idea/workspace.xml new file mode 100644 index 0000000..3a7a50c --- /dev/null +++ b/.idea/workspace.xml @@ -0,0 +1,185 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + { + "keyToString": { + "RunOnceActivity.OpenProjectViewOnStart": "true", + "RunOnceActivity.ShowReadmeOnStart": "true", + "WebServerToolWindowFactoryState": "false", + "last_opened_file_path": "/Users/andrewlee/Desktop/Projects/QueryPpsucClassRoomSpider", + "nodejs_package_manager_path": "npm", + "settings.editor.selected.configurable": "Errors" + } +} + + + + + + + + + + + + + + + + + + + + + + + 1664880195137 + + + + + + + + + + + + + file://$PROJECT_DIR$/networkAppClass/process_text.py + 36 + + + file://$PROJECT_DIR$/networkAppClass/process_text.py + 43 + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/README.md b/README.md index 03abf8c..e3494d1 100644 --- a/README.md +++ b/README.md @@ -1,13 +1,9 @@ - - # 四位一体课表_数据源 -针对四位一体课表,进行数据爬取与整理,导出数据文件。 +针对四位一体课表,进行数据爬取与整理,导出数据文件。 是空闲教室查询系统的数据源支撑。 - - -

四位一体课表爬虫

+

四位一体课表爬虫

支持公网环境下部署,异步并发爬取数据并整理导出数据文件
@@ -23,44 +19,46 @@

+具体逻辑都写在注释里。 - 具体逻辑都写在注释里。 - ## 目录 -- [上手指南](#上手指南) - - [开发前的配置要求](#开发前的配置要求) - - [安装步骤](#安装步骤) -- [文件目录说明](#文件目录说明) -- [版本控制](#版本控制) -- [作者](#作者) +- [四位一体课表_数据源](#四位一体课表_数据源) + - [目录](#目录) + - [上手指南](#上手指南) + - [开发前的配置要求](#开发前的配置要求) + - [**安装步骤**](#安装步骤) + - [文件目录说明](#文件目录说明) + - [版本控制](#版本控制) + - [原作者](#原作者) + - [使用docker部署服务](#使用docker部署服务) + - [接口格式](#接口格式) + - [TODO](#todo) ### 上手指南 - - ###### 开发前的配置要求 1. Python3.9+ -2. 需要的包依赖: -interval==1.0.0 -requests==2.26.0 -lxml==4.6.3 -selenium==3.141.0 -(此处selenium使用的是Chrome) +2. 需要的包依赖: + interval==1.0.0 + requests==2.26.0 + lxml==4.6.3 + selenium==3.141.0 + (此处selenium使用的是Chrome) ###### **安装步骤** 1. 安装上述所需包依赖 2. Clone the repo 3. 配置根目录下user.json中的username和password为自己的vpn登录账号密码。 + ```sh git clone https://github.com/cicidoll/QueryPpsucClassRoomSpider.git ``` ### 文件目录说明 - ``` filetree ├── /data/ @@ -93,8 +91,59 @@ filetree 该项目使用Git进行版本管理。您可以在repository参看当前可用版本。 -### 作者 +### 原作者 17M053 联系方式:ayaseemt@qq.com + + +### 使用docker部署服务 + +新的特性:使用sanic封装了接口,并且使用了docker进行部署,部署步骤如下: + +在根目录下输入依次输入如下两条命令,耐心等待: + +```plaintext +docker build -t classroomspider . +docker run --name ppsucClassRoomSpider -p 8000:8000 -d classroomspider +``` +(这里docker容器的名字随便取就行,镜像的名字有不能大写的要求,满足这个要求后也可以随便取) +然后就可以访问8000端口获取服务 + + +### 接口格式 + + - 获取所有数据: + +``http://localhost:8000/`` + + - 获取课程数据: + +``http://localhost:8000/detail/Class?bd=建筑名&t=时间段&dt=星期`` + + - 获取换课数据: + +``http://localhost:8000/detail/mobilize?bd=建筑名&rm=教室`` + + - 获取借教室数据: + +``http://localhost:8000/detail/borrow?bd=建筑名&rm=教室`` + +其中:建筑名包含:``tj zj zl xp``,时间段包含:``am12 am34 pm12 pm34``,星期包含``1 2 3 4 5`` + +对于不同的楼教室名称不同,一般就使用数字表示即可。特别的,团结的教室为:``tj1``(团阶一)、``tj2``(团阶二)...``tj8``(团阶八)、``tj9``(团报告厅) + + - Example + +| url | 含义 | +|------------------------------------------------------------|-----------------| +| http://localhost:8000/ | 获取所有数据 | +| http://localhost:8000/detail/Class?bd=tj&t=am34&dt=3 | 获取tj星期1上午12节的课程数据 | +| http://localhost:8000/detail/mobilize?bd=tj&rm=tj9 | 获取tj楼团报告厅教室的换课数据 | +| http://localhost:8000/detail/borrow?bd=zj&rm=202 | 获取zj楼202教室的借教室数据 | + +### TODO + + - 更新数据的方式确实很奇怪 + - 借教室的数据没有整合 \ No newline at end of file diff --git a/config/classRoomNumConfig.json b/config/classRoomNumConfig.json index 05e024c..f4875ff 100644 --- a/config/classRoomNumConfig.json +++ b/config/classRoomNumConfig.json @@ -22,6 +22,11 @@ "302","303","304","305","306","309", "402","403","404","405","406","409", "502","503","504","505","506","509" + ], + "TuanJie": [ + "团阶一", "团阶二", "团阶三", "团阶四", + "团阶五", "团阶六", "团阶七", "团阶八", + "团报告厅" ] }, "pathPool": [ diff --git a/config/dataTemplate.json b/config/dataTemplate.json index e30608d..c191c30 100644 --- a/config/dataTemplate.json +++ b/config/dataTemplate.json @@ -30,6 +30,21 @@ } }, "XiPei": + { + "am12": { + "1": [], "2": [], "3": [], "4": [], "5": [] + }, + "am34": { + "1": [], "2": [], "3": [], "4": [], "5": [] + }, + "pm12": { + "1": [], "2": [], "3": [], "4": [], "5": [] + }, + "pm34": { + "1": [], "2": [], "3": [], "4": [], "5": [] + } + }, + "TuanJie": { "am12": { "1": [], "2": [], "3": [], "4": [], "5": [] diff --git a/config/mobilizeBorrowTemplate.json b/config/mobilizeBorrowTemplate.json index 2d0284a..f1b0979 100644 --- a/config/mobilizeBorrowTemplate.json +++ b/config/mobilizeBorrowTemplate.json @@ -23,6 +23,11 @@ "302": [],"303": [],"304": [],"305": [],"306": [],"309": [], "402": [],"403": [],"404": [],"405": [],"406": [],"409": [], "502": [],"503": [],"504": [],"505": [],"506": [],"509": [] + }, + "TuanJie": + { "团阶一": [], "团阶二": [], "团阶三": [], "团阶四": [], + "团阶五": [], "团阶六": [], "团阶七": [], "团阶八": [], + "团报告厅": [] } }, "borrow": @@ -49,6 +54,11 @@ "302": [],"303": [],"304": [],"305": [],"306": [],"309": [], "402": [],"403": [],"404": [],"405": [],"406": [],"409": [], "502": [],"503": [],"504": [],"505": [],"506": [],"509": [] + }, + "TuanJie": + { "团阶一": [], "团阶二": [], "团阶三": [], "团阶四": [], + "团阶五": [], "团阶六": [], "团阶七": [], "团阶八": [], + "团报告厅": [] } } } \ No newline at end of file diff --git a/data/classRoomData.json b/data/classRoomData.json index 8b13789..fe77089 100644 --- a/data/classRoomData.json +++ b/data/classRoomData.json @@ -1 +1,1831 @@ - +{ + "zhuJian": { + "am12": { + "1": [ + "110", + "106", + "206", + "207", + "309", + "401", + "406", + "407", + "410", + "411", + "501", + "503", + "504", + "508", + "510", + "511" + ], + "2": [ + "104", + "101", + "110", + "207", + "403", + "411", + "501", + "502", + "504", + "506", + "508", + "510", + "511" + ], + "3": [ + "110", + "301", + "305", + "402", + "406", + "502", + "508" + ], + "4": [ + "202", + "201", + "112", + "110", + "111", + "303", + "306", + "309", + "401", + "402", + "404", + "502", + "503", + "507", + "508", + "510", + "511" + ], + "5": [ + "112", + "110", + "108", + "305", + "306", + "309", + "402", + "404", + "406", + "507", + "510", + "511" + ] + }, + "am34": { + "1": [ + "110", + "309", + "401", + "406", + "407", + "408", + "410", + "411", + "501", + "503", + "504", + "505", + "508", + "510", + "511" + ], + "2": [ + "110", + "111", + "208", + "403", + "406", + "407", + "408", + "502", + "504", + "508" + ], + "3": [ + "110", + "111", + "302", + "304", + "401", + "402", + "403", + "405", + "406", + "502", + "506", + "507", + "508" + ], + "4": [ + "201", + "112", + "110", + "102", + "105", + "111", + "208", + "305", + "306", + "401", + "402", + "404", + "405", + "508", + "511" + ], + "5": [ + "104", + "202", + "101", + "112", + "110", + "106", + "205", + "305", + "306", + "309", + "401", + "505", + "506", + "508" + ] + }, + "pm12": { + "1": [ + "104", + "110", + "108", + "302", + "305", + "308", + "307", + "309", + "401", + "402", + "403", + "405", + "406", + "407", + "408", + "409", + "411", + "501", + "503", + "507", + "508", + "509", + "510", + "511" + ], + "2": [ + "101", + "110", + "102", + "111", + "205", + "210", + "309", + "401", + "402", + "403", + "407", + "408", + "409", + "410", + "411", + "501", + "502", + "503", + "505", + "506", + "507", + "510" + ], + "3": [ + "104", + "202", + "101", + "201", + "112", + "110", + "108", + "102", + "204", + "105", + "106", + "206", + "111", + "208", + "205", + "210", + "207", + "301", + "302", + "303", + "304", + "305", + "306", + "308", + "307", + "309", + "401", + "402", + "403", + "404", + "405", + "406", + "407", + "408", + "409", + "410", + "411", + "501", + "502", + "503", + "504", + "505", + "506", + "507", + "508", + "509", + "510", + "511" + ], + "4": [ + "104", + "110", + "204", + "111", + "303", + "304", + "305", + "306", + "308", + "307", + "309", + "401", + "404", + "405", + "411", + "507", + "508", + "511" + ], + "5": [ + "101", + "112", + "110", + "102", + "204", + "111", + "302", + "303", + "305", + "306", + "308", + "309", + "401", + "403", + "405", + "407", + "408", + "409", + "411", + "501", + "503", + "504", + "506", + "507", + "508", + "510", + "511" + ] + }, + "pm34": { + "1": [ + "104", + "110", + "108", + "210", + "302", + "304", + "305", + "308", + "307", + "309", + "401", + "402", + "403", + "405", + "406", + "407", + "408", + "409", + "410", + "411", + "501", + "502", + "503", + "505", + "506", + "507", + "508", + "509", + "510", + "511" + ], + "2": [ + "202", + "101", + "112", + "110", + "102", + "105", + "111", + "208", + "205", + "210", + "309", + "401", + "402", + "403", + "407", + "408", + "409", + "410", + "411", + "501", + "502", + "503", + "505", + "506", + "507", + "508", + "509", + "510", + "511" + ], + "3": [ + "104", + "202", + "101", + "201", + "112", + "110", + "108", + "102", + "204", + "105", + "106", + "206", + "111", + "208", + "205", + "210", + "207", + "301", + "302", + "303", + "304", + "305", + "306", + "308", + "307", + "309", + "401", + "402", + "403", + "404", + "405", + "406", + "407", + "408", + "409", + "410", + "411", + "501", + "502", + "503", + "504", + "505", + "506", + "507", + "508", + "509", + "510", + "511" + ], + "4": [ + "104", + "110", + "204", + "111", + "208", + "210", + "301", + "303", + "304", + "305", + "306", + "308", + "307", + "309", + "401", + "404", + "405", + "409", + "410", + "411", + "501", + "502", + "503", + "504", + "505", + "507", + "508", + "509", + "511" + ], + "5": [ + "104", + "101", + "112", + "110", + "102", + "204", + "106", + "111", + "302", + "303", + "305", + "306", + "308", + "309", + "401", + "403", + "404", + "405", + "407", + "408", + "409", + "410", + "411", + "501", + "502", + "503", + "504", + "505", + "506", + "507", + "508", + "509", + "510", + "511" + ] + } + }, + "zhongLou": { + "am12": { + "1": [ + "103", + "107", + "110", + "113", + "205", + "204", + "206", + "207", + "210", + "211", + "303", + "304", + "305", + "306", + "307", + "308", + "407", + "503", + "504", + "506", + "505", + "507", + "510", + "603", + "607", + "703", + "704", + "705", + "707", + "708" + ], + "2": [ + "103", + "107", + "110", + "113", + "203", + "205", + "204", + "206", + "207", + "210", + "211", + "303", + "305", + "306", + "307", + "308", + "407", + "503", + "504", + "506", + "505", + "507", + "510", + "603", + "607", + "703", + "704", + "705", + "707", + "708" + ], + "3": [ + "103", + "104", + "107", + "110", + "113", + "203", + "206", + "207", + "305", + "307", + "308", + "407", + "408", + "503", + "504", + "506", + "505", + "507", + "510", + "603", + "607", + "703", + "704", + "705", + "707", + "708" + ], + "4": [ + "103", + "104", + "107", + "110", + "112", + "113", + "205", + "204", + "206", + "207", + "208", + "211", + "303", + "304", + "305", + "306", + "307", + "308", + "407", + "503", + "504", + "506", + "505", + "507", + "510", + "603", + "607", + "703", + "704", + "705", + "707", + "708" + ], + "5": [ + "103", + "104", + "113", + "203", + "205", + "204", + "206", + "207", + "208", + "210", + "211", + "304", + "305", + "306", + "307", + "308", + "407", + "408", + "503", + "504", + "506", + "505", + "507", + "510", + "603", + "607", + "703", + "704", + "705", + "707", + "708" + ] + }, + "am34": { + "1": [ + "104", + "110", + "112", + "113", + "205", + "204", + "206", + "207", + "210", + "211", + "303", + "304", + "305", + "306", + "307", + "308", + "407", + "408", + "503", + "504", + "506", + "505", + "507", + "510", + "603", + "607", + "703", + "704", + "705", + "707", + "708" + ], + "2": [ + "103", + "104", + "107", + "110", + "113", + "203", + "205", + "204", + "206", + "208", + "210", + "211", + "303", + "304", + "305", + "306", + "307", + "308", + "407", + "503", + "504", + "506", + "505", + "507", + "510", + "603", + "607", + "703", + "704", + "705", + "707", + "708" + ], + "3": [ + "103", + "104", + "107", + "110", + "112", + "113", + "203", + "204", + "206", + "211", + "303", + "305", + "306", + "307", + "407", + "408", + "503", + "504", + "506", + "505", + "507", + "510", + "603", + "607", + "703", + "704", + "705", + "707", + "708" + ], + "4": [ + "103", + "104", + "107", + "110", + "113", + "205", + "204", + "206", + "208", + "210", + "211", + "303", + "304", + "305", + "306", + "307", + "308", + "503", + "504", + "506", + "505", + "507", + "510", + "603", + "607", + "703", + "704", + "705", + "707", + "708" + ], + "5": [ + "103", + "104", + "205", + "204", + "206", + "207", + "208", + "210", + "211", + "303", + "304", + "305", + "306", + "307", + "308", + "408", + "503", + "504", + "506", + "505", + "507", + "510", + "603", + "607", + "703", + "704", + "705", + "707", + "708" + ] + }, + "pm12": { + "1": [ + "103", + "104", + "107", + "110", + "112", + "113", + "205", + "204", + "206", + "207", + "210", + "211", + "303", + "304", + "305", + "306", + "308", + "408", + "503", + "504", + "506", + "505", + "507", + "510", + "603", + "607", + "703", + "704", + "705", + "707", + "708" + ], + "2": [ + "103", + "104", + "107", + "110", + "112", + "113", + "205", + "204", + "206", + "207", + "208", + "210", + "211", + "304", + "305", + "306", + "307", + "308", + "407", + "408", + "503", + "504", + "506", + "505", + "507", + "510", + "603", + "607", + "703", + "704", + "705", + "707", + "708" + ], + "3": [ + "103", + "104", + "107", + "110", + "112", + "113", + "203", + "205", + "204", + "206", + "207", + "208", + "210", + "211", + "303", + "304", + "305", + "306", + "307", + "308", + "407", + "408", + "503", + "504", + "506", + "505", + "507", + "510", + "603", + "607", + "703", + "704", + "705", + "707", + "708" + ], + "4": [ + "103", + "104", + "107", + "110", + "205", + "204", + "206", + "207", + "208", + "211", + "303", + "305", + "307", + "308", + "408", + "503", + "504", + "506", + "505", + "507", + "510", + "603", + "607", + "703", + "704", + "705", + "707", + "708" + ], + "5": [ + "103", + "104", + "107", + "110", + "112", + "203", + "205", + "204", + "206", + "207", + "208", + "210", + "211", + "303", + "304", + "305", + "306", + "307", + "308", + "408", + "503", + "504", + "506", + "505", + "507", + "510", + "603", + "607", + "703", + "704", + "705", + "707", + "708" + ] + }, + "pm34": { + "1": [ + "103", + "104", + "107", + "110", + "112", + "113", + "203", + "205", + "204", + "206", + "207", + "210", + "211", + "303", + "304", + "305", + "306", + "308", + "408", + "503", + "504", + "506", + "505", + "507", + "510", + "603", + "607", + "703", + "704", + "705", + "707", + "708" + ], + "2": [ + "103", + "104", + "107", + "110", + "112", + "113", + "203", + "205", + "204", + "206", + "207", + "208", + "210", + "211", + "304", + "305", + "306", + "307", + "308", + "407", + "408", + "503", + "504", + "506", + "505", + "507", + "510", + "603", + "607", + "703", + "704", + "705", + "707", + "708" + ], + "3": [ + "103", + "104", + "107", + "110", + "112", + "113", + "203", + "205", + "204", + "206", + "207", + "208", + "210", + "211", + "303", + "304", + "305", + "306", + "307", + "308", + "407", + "408", + "503", + "504", + "506", + "505", + "507", + "510", + "603", + "607", + "703", + "704", + "705", + "707", + "708" + ], + "4": [ + "103", + "104", + "107", + "110", + "203", + "205", + "204", + "206", + "207", + "208", + "211", + "303", + "305", + "307", + "308", + "408", + "503", + "504", + "506", + "505", + "507", + "510", + "603", + "607", + "703", + "704", + "705", + "707", + "708" + ], + "5": [ + "103", + "104", + "107", + "110", + "112", + "203", + "205", + "204", + "206", + "207", + "208", + "210", + "211", + "303", + "304", + "305", + "306", + "307", + "308", + "407", + "408", + "503", + "504", + "506", + "505", + "507", + "510", + "603", + "607", + "703", + "704", + "705", + "707", + "708" + ] + } + }, + "XiPei": { + "am12": { + "1": [ + "102", + "105", + "109", + "202", + "203", + "204", + "205", + "206", + "209", + "303", + "304", + "305", + "306", + "309", + "404", + "405", + "406", + "409", + "502", + "506", + "509" + ], + "2": [ + "105", + "106", + "109", + "202", + "204", + "206", + "209", + "303", + "304", + "305", + "306", + "402", + "403", + "404", + "405", + "409", + "503", + "505", + "506", + "509" + ], + "3": [ + "102", + "109", + "202", + "206", + "209", + "302", + "303", + "304", + "305", + "402", + "403", + "406", + "409", + "502", + "506", + "509" + ], + "4": [ + "102", + "103", + "104", + "105", + "106", + "202", + "203", + "205", + "303", + "304", + "309", + "402", + "403", + "404", + "405", + "406", + "409", + "504", + "506", + "509" + ], + "5": [ + "104", + "105", + "109", + "202", + "203", + "204", + "205", + "206", + "209", + "303", + "304", + "305", + "306", + "403", + "404", + "405", + "406", + "502", + "504", + "505", + "506", + "509" + ] + }, + "am34": { + "1": [ + "102", + "105", + "109", + "202", + "203", + "204", + "205", + "206", + "209", + "302", + "303", + "304", + "305", + "306", + "309", + "403", + "405", + "406", + "409", + "502", + "506", + "509" + ], + "2": [ + "102", + "104", + "105", + "106", + "109", + "204", + "206", + "209", + "302", + "303", + "304", + "305", + "306", + "402", + "403", + "404", + "405", + "406", + "409", + "505", + "509" + ], + "3": [ + "102", + "103", + "106", + "109", + "202", + "205", + "206", + "209", + "302", + "303", + "304", + "305", + "306", + "402", + "403", + "406", + "502", + "506", + "509" + ], + "4": [ + "102", + "104", + "105", + "106", + "202", + "205", + "303", + "304", + "305", + "306", + "309", + "402", + "403", + "404", + "405", + "406", + "409", + "505", + "506", + "509" + ], + "5": [ + "104", + "109", + "202", + "203", + "204", + "205", + "206", + "209", + "302", + "303", + "304", + "305", + "306", + "402", + "403", + "404", + "405", + "406", + "409", + "502", + "505", + "506", + "509" + ] + }, + "pm12": { + "1": [ + "103", + "105", + "109", + "202", + "203", + "204", + "205", + "206", + "209", + "302", + "303", + "304", + "305", + "306", + "309", + "402", + "403", + "404", + "405", + "406", + "409", + "502", + "503", + "504", + "505", + "506", + "509" + ], + "2": [ + "103", + "104", + "105", + "109", + "202", + "203", + "204", + "206", + "209", + "302", + "303", + "304", + "305", + "306", + "309", + "402", + "403", + "404", + "405", + "406", + "409", + "502", + "503", + "504", + "505", + "506", + "509" + ], + "3": [ + "102", + "103", + "104", + "105", + "106", + "109", + "202", + "203", + "204", + "205", + "206", + "209", + "302", + "303", + "304", + "305", + "306", + "309", + "402", + "403", + "404", + "405", + "406", + "409", + "502", + "503", + "504", + "505", + "506", + "509" + ], + "4": [ + "103", + "104", + "109", + "202", + "203", + "205", + "206", + "209", + "302", + "303", + "304", + "305", + "306", + "309", + "402", + "403", + "404", + "405", + "406", + "409", + "502", + "503", + "504", + "505", + "506", + "509" + ], + "5": [ + "103", + "104", + "109", + "203", + "204", + "205", + "206", + "209", + "302", + "303", + "304", + "305", + "306", + "309", + "402", + "403", + "404", + "405", + "406", + "409", + "502", + "503", + "504", + "505", + "506", + "509" + ] + }, + "pm34": { + "1": [ + "103", + "105", + "109", + "202", + "203", + "204", + "205", + "206", + "209", + "302", + "303", + "304", + "305", + "306", + "309", + "402", + "403", + "404", + "405", + "406", + "409", + "502", + "503", + "504", + "505", + "506", + "509" + ], + "2": [ + "103", + "104", + "105", + "106", + "109", + "202", + "203", + "204", + "206", + "209", + "302", + "303", + "304", + "305", + "306", + "309", + "402", + "403", + "404", + "405", + "406", + "409", + "502", + "503", + "504", + "505", + "506", + "509" + ], + "3": [ + "102", + "103", + "104", + "105", + "106", + "109", + "202", + "203", + "204", + "205", + "206", + "209", + "302", + "303", + "304", + "305", + "306", + "309", + "402", + "403", + "404", + "405", + "406", + "409", + "502", + "503", + "504", + "505", + "506", + "509" + ], + "4": [ + "103", + "104", + "109", + "202", + "203", + "205", + "206", + "209", + "302", + "303", + "304", + "305", + "306", + "309", + "402", + "403", + "404", + "405", + "406", + "409", + "502", + "503", + "504", + "505", + "506", + "509" + ], + "5": [ + "103", + "104", + "106", + "109", + "203", + "204", + "205", + "206", + "209", + "302", + "303", + "304", + "305", + "306", + "309", + "402", + "403", + "404", + "405", + "406", + "409", + "502", + "503", + "504", + "505", + "506", + "509" + ] + } + }, + "TuanJie": { + "am12": { + "1": [ + "\u56e2\u9636\u4e09", + "\u56e2\u9636\u56db", + "\u56e2\u62a5\u544a\u5385" + ], + "2": [ + "\u56e2\u9636\u4e8c", + "\u56e2\u9636\u56db", + "\u56e2\u9636\u4e03", + "\u56e2\u9636\u516b", + "\u56e2\u62a5\u544a\u5385" + ], + "3": [ + "\u56e2\u9636\u4e09", + "\u56e2\u9636\u4e94", + "\u56e2\u9636\u516b", + "\u56e2\u62a5\u544a\u5385" + ], + "4": [ + "\u56e2\u9636\u4e09", + "\u56e2\u9636\u4e94", + "\u56e2\u9636\u4e03" + ], + "5": [ + "\u56e2\u9636\u4e09", + "\u56e2\u9636\u4e94", + "\u56e2\u9636\u516d", + "\u56e2\u9636\u4e03", + "\u56e2\u62a5\u544a\u5385" + ] + }, + "am34": { + "1": [ + "\u56e2\u9636\u4e00", + "\u56e2\u9636\u4e94" + ], + "2": [ + "\u56e2\u9636\u4e8c", + "\u56e2\u9636\u4e03", + "\u56e2\u9636\u516b", + "\u56e2\u62a5\u544a\u5385" + ], + "3": [ + "\u56e2\u9636\u4e8c", + "\u56e2\u9636\u4e09", + "\u56e2\u9636\u4e94", + "\u56e2\u9636\u516b" + ], + "4": [ + "\u56e2\u9636\u4e09", + "\u56e2\u9636\u4e94", + "\u56e2\u9636\u4e03" + ], + "5": [ + "\u56e2\u9636\u4e00", + "\u56e2\u9636\u4e94", + "\u56e2\u9636\u4e03", + "\u56e2\u62a5\u544a\u5385" + ] + }, + "pm12": { + "1": [ + "\u56e2\u9636\u516d", + "\u56e2\u9636\u4e03", + "\u56e2\u9636\u516b", + "\u56e2\u62a5\u544a\u5385" + ], + "2": [ + "\u56e2\u9636\u4e8c", + "\u56e2\u9636\u56db" + ], + "3": [ + "\u56e2\u9636\u4e00", + "\u56e2\u9636\u4e8c", + "\u56e2\u9636\u4e09", + "\u56e2\u9636\u56db", + "\u56e2\u9636\u4e94", + "\u56e2\u9636\u516d", + "\u56e2\u9636\u4e03", + "\u56e2\u9636\u516b", + "\u56e2\u62a5\u544a\u5385" + ], + "4": [ + "\u56e2\u9636\u4e00", + "\u56e2\u9636\u516d", + "\u56e2\u9636\u4e03" + ], + "5": [ + "\u56e2\u9636\u4e00", + "\u56e2\u9636\u4e94", + "\u56e2\u9636\u4e03", + "\u56e2\u62a5\u544a\u5385" + ] + }, + "pm34": { + "1": [ + "\u56e2\u9636\u4e8c", + "\u56e2\u9636\u516d", + "\u56e2\u9636\u4e03", + "\u56e2\u9636\u516b", + "\u56e2\u62a5\u544a\u5385" + ], + "2": [ + "\u56e2\u9636\u4e8c", + "\u56e2\u9636\u56db" + ], + "3": [ + "\u56e2\u9636\u4e00", + "\u56e2\u9636\u4e8c", + "\u56e2\u9636\u4e09", + "\u56e2\u9636\u56db", + "\u56e2\u9636\u4e94", + "\u56e2\u9636\u516d", + "\u56e2\u9636\u4e03", + "\u56e2\u9636\u516b", + "\u56e2\u62a5\u544a\u5385" + ], + "4": [ + "\u56e2\u9636\u4e00", + "\u56e2\u9636\u4e8c", + "\u56e2\u9636\u516d", + "\u56e2\u9636\u4e03" + ], + "5": [ + "\u56e2\u9636\u4e00", + "\u56e2\u9636\u4e09", + "\u56e2\u9636\u4e94", + "\u56e2\u9636\u516d", + "\u56e2\u9636\u4e03", + "\u56e2\u9636\u516b", + "\u56e2\u62a5\u544a\u5385" + ] + } + } +} \ No newline at end of file diff --git a/data/mobilizeBorrow.json b/data/mobilizeBorrow.json index 8b13789..ecefd0e 100644 --- a/data/mobilizeBorrow.json +++ b/data/mobilizeBorrow.json @@ -1 +1,266 @@ - +{ + "mobilize":{ + "zhuJian":{ + "101":[], + "102":[], + "104":[], + "105":[], + "106":[], + "108":[], + "110":[], + "111":[], + "112":[], + "201":[], + "202":[], + "204":[], + "205":[], + "206":[], + "207":[], + "208":[], + "210":[], + "301":[], + "302":[], + "303":[], + "304":[], + "305":[], + "306":[], + "307":[], + "308":[], + "309":[], + "401":[], + "402":[], + "403":[], + "404":[], + "405":[], + "406":[], + "407":[], + "408":[], + "409":[], + "410":[], + "411":[], + "501":[], + "502":[], + "503":[], + "504":[], + "505":[], + "506":[], + "507":[], + "508":[], + "509":[], + "510":[], + "511":[] + }, + "zhongLou":{ + "103":[], + "104":[], + "107":[], + "110":[], + "112":[], + "113":[], + "203":[], + "204":[], + "205":[], + "206":[], + "207":[], + "208":[], + "210":[], + "211":[], + "303":[], + "304":[], + "305":[], + "306":[], + "307":[], + "308":[], + "407":[], + "408":[], + "503":[], + "504":[], + "505":[], + "506":[], + "507":[], + "510":[], + "603":[], + "607":[], + "703":[], + "704":[], + "705":[], + "707":[], + "708":[] + }, + "XiPei":{ + "102":[], + "103":[], + "104":[], + "105":[], + "106":[], + "109":[], + "202":[], + "203":[], + "204":[], + "205":[], + "206":[], + "209":[], + "302":[], + "303":[], + "304":[], + "305":[], + "306":[], + "309":[], + "402":[], + "403":[], + "404":[], + "405":[], + "406":[], + "409":[], + "502":[], + "503":[], + "504":[], + "505":[], + "506":[], + "509":[] + }, + "TuanJie":{ + "团阶一":[], + "团阶二":[], + "团阶三":[], + "团阶四":[], + "团阶五":[], + "团阶六":[], + "团阶七":[], + "团阶八":[], + "团报告厅":[] + } + }, + "borrow":{ + "zhuJian":{ + "101":[], + "102":[], + "104":[], + "105":[], + "106":[], + "108":[], + "110":[], + "111":[], + "112":[], + "201":[], + "202":[], + "204":[], + "205":[], + "206":[], + "207":[], + "208":[], + "210":[], + "301":[], + "302":[], + "303":[], + "304":[], + "305":[], + "306":[], + "307":[], + "308":[], + "309":[], + "401":[], + "402":[], + "403":[], + "404":[], + "405":[], + "406":[], + "407":[], + "408":[], + "409":[], + "410":[], + "411":[], + "501":[], + "502":[], + "503":[], + "504":[], + "505":[], + "506":[], + "507":[], + "508":[], + "509":[], + "510":[], + "511":[] + }, + "zhongLou":{ + "103":[], + "104":[], + "107":[], + "110":[], + "112":[], + "113":[], + "203":[], + "204":[], + "205":[], + "206":[], + "207":[], + "208":[], + "210":[], + "211":[], + "303":[], + "304":[], + "305":[], + "306":[], + "307":[], + "308":[], + "407":[], + "408":[], + "503":[], + "504":[], + "505":[], + "506":[], + "507":[], + "510":[], + "603":[], + "607":[], + "703":[], + "704":[], + "705":[], + "707":[], + "708":[] + }, + "XiPei":{ + "102":[], + "103":[], + "104":[], + "105":[], + "106":[], + "109":[], + "202":[], + "203":[], + "204":[], + "205":[], + "206":[], + "209":[], + "302":[], + "303":[], + "304":[], + "305":[], + "306":[], + "309":[], + "402":[], + "403":[], + "404":[], + "405":[], + "406":[], + "409":[], + "502":[], + "503":[], + "504":[], + "505":[], + "506":[], + "509":[] + }, + "TuanJie":{ + "团阶一":[], + "团阶二":[], + "团阶三":[], + "团阶四":[], + "团阶五":[], + "团阶六":[], + "团阶七":[], + "团阶八":[], + "团报告厅":[] + } + } +} \ No newline at end of file diff --git a/dockerfile b/dockerfile new file mode 100644 index 0000000..ae9a3af --- /dev/null +++ b/dockerfile @@ -0,0 +1,16 @@ + +FROM sanicframework/sanic:3.8-latest + +WORKDIR /queryPPSUCClassRoomSpider + +COPY . . + +RUN apk add --update --no-cache g++ gcc libxslt-dev python3-dev openssl-dev + +RUN apk add --no-cache gcc musl-dev libxslt-dev + +RUN pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple + +EXPOSE 8000 + +CMD ["python", "server.py"] diff --git a/main.py b/main.py deleted file mode 100644 index fd191c6..0000000 --- a/main.py +++ /dev/null @@ -1,52 +0,0 @@ -import requests -import http.cookiejar -import time -from requests.api import get - -from networkAppClass.login import Login -from networkAppClass.get_urldata import GetUrlData -from networkAppClass.computed_week import GetWeek -from networkAppClass.create_url_pool import CreateUrlPool -from networkAppClass.get_html import GetHtmlBus -from utils import loadJson -""" vpn_timestamp的单位是纳秒 有效时间应该是1h - 也有可能是时间戳,13位时间戳 - wrdrecordvisit要比vpn_timestamp早一些 - 没有wrdrecordvisit参数,也能请求到数据 """ - -startCpu = time.perf_counter() -start = time.time() - -# 全局变量,保存着登录后的cookie信息 -session = requests.Session() -session.cookies = http.cookiejar.LWPCookieJar('cookie') - -""" 设置登录用方法所需Headers参数 """ -headers = loadJson('config/requestConfig.json')["headers"] -session.headers = headers -# 实例化登录器,触发登录操作 -# 更新session,保存更新后响应值 -login = Login(session) -session = login.session -loginResponse = login.response - -# 获取四位一体课表链接 -swytUrlText = GetUrlData(headers, session).getSwytUrl() -# 获取教务处链接 -jwcUrlText = GetUrlData(headers, session).getJwcUrl() -# 获取教学周 -computedWeek = GetWeek(session, headers, jwcUrlText) -week = computedWeek.getFirstDay() - -# # 批量创建链接池 -createUrlPool = CreateUrlPool(swytUrlText) -createUrlPool.createUrlObject() -urlPools = createUrlPool.urlPoolResult -# # 启动多线程并发控制 -getHtmlBus = GetHtmlBus(headers, session) -getHtmlBus.bus(urlPools, week) - -end = time.time() -endCpu = time.perf_counter() -print('程序执行时间: ',end - start) -print('CPU执行时间: ',endCpu - startCpu) \ No newline at end of file diff --git a/networkAppClass/computed_week.py b/networkAppClass/computed_week.py index 4a412e2..617a313 100644 --- a/networkAppClass/computed_week.py +++ b/networkAppClass/computed_week.py @@ -3,7 +3,7 @@ import math class GetWeek: - def __init__(self, session, headers, jwcUrlText): + def __init__(self, session, headers, jwcUrlText=""): self.jwcUrl = 'https://webvpn.ppsuc.edu.cn%s' % (jwcUrlText) # 将传入的Headers和Session进行保存 self.headers = headers @@ -36,5 +36,21 @@ def computedWeek(self): initFirstDifference = 0 if initFirstDifference==0 else 7 - initFirstDifference firstNowDifference = (nowDate - firstDate).days - initFirstDifference if firstNowDifference<0: return 0 - week = math.ceil(firstNowDifference / 7) + 1 + week = math.ceil(firstNowDifference / 7) + 1 # 师兄写错哩 + return week + + @staticmethod + def manual_getweek(FirstDate: str): + """依赖管理员输入从而手动获取和计算当前教学周""" + # 初始化变量类型为datetime + initDate = datetime.datetime.strptime('2021-8-23', "%Y-%m-%d") + firstDate = datetime.datetime.strptime(FirstDate, "%Y-%m-%d") + nowDate = datetime.datetime.today() + # nowDate = datetime.datetime.strptime('2022-11-9', "%Y-%m-%d") + # 开始计算 + initFirstDifference = (firstDate - initDate).days % 7 + initFirstDifference = 0 if initFirstDifference==0 else 7 - initFirstDifference + firstNowDifference = (nowDate - firstDate).days - initFirstDifference + if firstNowDifference<0: return 0 + week = math.floor(firstNowDifference / 7) + 1 return week \ No newline at end of file diff --git a/networkAppClass/create_url_pool.py b/networkAppClass/create_url_pool.py index 553af79..807be25 100644 --- a/networkAppClass/create_url_pool.py +++ b/networkAppClass/create_url_pool.py @@ -1,18 +1,21 @@ from urllib import parse import sys + sys.path.append(".") -from utils import loadJson +from utils import loadJson, encodeGBK # 拼接字符串模板 -ZHUJIAN = "{}?{}&jxcdmc=%27%CD%C5%D6%FD%BD%A3%C2%A5{}%27" -ZHONG = "{}?{}&jxcdmc=%27%CD%C5%D3%FD%BE%AF%D6%D0%C2%A5{}%27" -XIPEI = "{}?{}&jxcdmc=%27%CD%C5%D3%FD%BE%AF%CE%F7%C2%A5{}%27" +ZHUJIAN = "{}?{}&jxcdmc=%27" + encodeGBK("团铸剑楼") + "{}%27" +ZHONG = "{}?{}&jxcdmc=%27" + encodeGBK("团育警中楼") + "{}%27" +XIPEI = "{}?{}&jxcdmc=%27" + encodeGBK("团育警西楼") + "{}%27" +TUANJIE = "{}?{}&jxcdmc=%27{}%27" # 拼接字符串具体填充对象 PARSEURLENCODE = { - "zhuJian": ZHUJIAN, - "zhongLou": ZHONG, - "XiPei": XIPEI + "zhuJian": ZHUJIAN, + "zhongLou": ZHONG, + "XiPei": XIPEI, + "TuanJie": TUANJIE } # 2021-9-12更新 @@ -20,6 +23,7 @@ OLDPHP = "index.php" NEWPHP = "jxcdkbcx.php" + # 批量创建链接池 class CreateUrlPool: def __init__(self, swytUrlText: str = ''): @@ -37,13 +41,22 @@ def createUrlObject(self): for classRoomName in self.classRoomNameList: classRoomNumList = self.classRoomsNumLists[classRoomName] for classRoomNum in classRoomNumList: - url: str = ( - PARSEURLENCODE[classRoomName].format( - self.swytUrlText.replace(OLDPHP, NEWPHP),# 待修改 - parse.urlencode(self.createUrlDic), - classRoomNum + if classRoomName != "TuanJie": + url: str = ( + PARSEURLENCODE[classRoomName].format( + self.swytUrlText.replace(OLDPHP, NEWPHP), # 待修改 + parse.urlencode(self.createUrlDic), + classRoomNum + ) + ) + else: # 对于团阶的数据特殊处理 + url: str = ( + PARSEURLENCODE[classRoomName].format( + self.swytUrlText.replace(OLDPHP, NEWPHP), # 待修改 + parse.urlencode(self.createUrlDic), + encodeGBK(classRoomNum) + ) ) - ) # 拼接为完整的Url链接 urlReferer = self.requestsUA["headers"]["Referer"][0:-1] url = urlReferer + url @@ -55,4 +68,7 @@ def createUrlObject(self): } self.urlPoolResult.append(urlObject) # uid自增1 - uid = uid +1 \ No newline at end of file + uid = uid + 1 + +# https://webvpn.ppsuc.edu.cn/http/77726476706e69737468656265737421a1a510d276693c1e2c59dae2c90476/swyt/jxcdkbcx.php?xnxq=%222021-20221%22&jxcdmc=%27%CD%C5%D6%FD%BD%A3%C2%A5101%27 +# https://webvpn.ppsuc.edu.cn/http/77726476706e69737468656265737421a1a510d276693c1e2c59dae2c90476/swyt/jxcdkbcx.php?xnxq= %222021-20221%22 &jxcdmc= %27%CD%C5%D3%FD%BE%AF%CE%F7%C2%A5509%27 diff --git a/networkAppClass/get_html.py b/networkAppClass/get_html.py index 204ce9a..eb15831 100644 --- a/networkAppClass/get_html.py +++ b/networkAppClass/get_html.py @@ -3,9 +3,11 @@ import functools from .process_text import ProcessText + # requests会阻塞asyncio循环 class GetHtmlBus: """ 异步爬取网页文本总线控制器 """ + def __init__(self, headers, session): # 将传入的Headers和Session进行保存 self.requestsUA = headers @@ -27,11 +29,11 @@ def bus(self, urlPools, week): self.processText.processTextSave() async def send(self, - requestsObject={ - "uid": "-1", - "classRoomName": "None", - "classRoomNum": "-1", - "url": "None"}): + requestsObject={ + "uid": "-1", + "classRoomName": "None", + "classRoomNum": "-1", + "url": "None"}): """ 发送网络请求,并返回抓取到的响应内容 """ # 自定义响应对象:responseObject # responseObject = {"uid":链接Id, "classRoomName":教学楼名, "classRoomNum":教室, "content":抓取文本} @@ -39,26 +41,28 @@ async def send(self, "uid": requestsObject["uid"], "classRoomName": requestsObject["classRoomName"], "classRoomNum": requestsObject["classRoomNum"], - "content": '' + "content": '', + "url": requestsObject['url'] } try: # 利用BaseEventLoop.run_in_executor()可以在coroutine中执行第三方的命令,例如requests.get() # 第三方命令的参数与关键字利用functools.partial传入 loop = asyncio.get_event_loop() - future = loop.run_in_executor( None, - functools.partial( - self.session.get, - url = requestsObject["url"], - headers = self.requestsUA - ) - ) + future = loop.run_in_executor(None, + functools.partial( + self.session.get, + url=requestsObject["url"], + headers=self.requestsUA + ) + ) response = await future # 要设置响应包的编码格式为gbk,不然会乱码!!! response.encoding = "gbk" # HTML内容 - content = response.text + content = response.text responseObject["content"] = content self.processText.processTextContent(responseObject) - except: - pass \ No newline at end of file + except Exception as e: + print(e) + raise e diff --git a/networkAppClass/get_urldata.py b/networkAppClass/get_urldata.py index ad548b4..1f5ca86 100644 --- a/networkAppClass/get_urldata.py +++ b/networkAppClass/get_urldata.py @@ -3,7 +3,6 @@ class GetUrlData: # 四位一体课表链接所在path # swytUrlPath = '//*[@id="group-2"]/div[9]/div/div[3]/@data-redirect' - def __init__(self, headers, session): # 将传入的Headers和Session进行保存 self.headers = headers diff --git a/networkAppClass/login.py b/networkAppClass/login.py index 876e3f6..65e45ef 100644 --- a/networkAppClass/login.py +++ b/networkAppClass/login.py @@ -1,9 +1,11 @@ from lxml import etree from requests.sessions import session import sys + sys.path.append(".") from utils import loadJson + class Login: # 验证码图片ID所在path captchaPath = '//*[@id="captcha-wrap"]/div/div/input[1]/@value' @@ -18,14 +20,15 @@ def __init__(self, session): self.session = session self.response = '' self.doLogin() - + """ 登录注册时, 需要发送的验证码随机参数 解析Html节点内容,返回str类型的参数 """ + def getSetCaptcha(self): # 设置请求头参数并请求网页 url = 'https://webvpn.ppsuc.edu.cn/login' - response = self.session.get(url, headers = self.headers) + response = self.session.get(url, headers=self.headers) # HTML内容 html = response.text htmlContent = etree.HTML(html) @@ -34,9 +37,10 @@ def getSetCaptcha(self): self.loginData['captcha_id'] = captchaText """ 使用账号密码进行登录 """ + def doLogin(self): # 更新随机码参数 self.getSetCaptcha() url = 'https://webvpn.ppsuc.edu.cn/do-login' - response = self.session.post(url, data = self.loginData, headers = self.headers) - self.response = response \ No newline at end of file + response = self.session.post(url, data=self.loginData, headers=self.headers) + self.response = response diff --git a/networkAppClass/process_text.py b/networkAppClass/process_text.py index 62b5deb..1b22f81 100644 --- a/networkAppClass/process_text.py +++ b/networkAppClass/process_text.py @@ -1,13 +1,16 @@ +from dataclasses import dataclass from lxml import etree from interval import Interval -from datetime import datetime # 导入datetime模块 +import datetime # 导入datetime模块 import re import json import sys + sys.path.append(".") from utils import loadJson -class ProcessText(): + +class ProcessText: def __init__(self, week): self.week = week # 写入json的文本变量,初始化模板 @@ -17,49 +20,80 @@ def __init__(self, week): def processTextSave(self): jsonName = "./data/classRoomData.json" - jsondata = json.dumps(self.classRoomDataJsonText, indent=4, separators=(',', ': '))# json格式美化写入 - writeFile = open(jsonName,'w', encoding='utf-8') + jsondata = json.dumps(self.classRoomDataJsonText, indent=4, separators=(',', ': ')) # json格式美化写入 + writeFile = open(jsonName, 'w', encoding='utf-8') writeFile.write(jsondata) writeFile.close() jsonName = "./data/mobilizeBorrow.json" # 加入 ensure_ascii=False 选项。导出json文件不乱码 - jsondata = json.dumps(self.mobilizeBorrowJsonText, indent=4, ensure_ascii=False, separators=(',',':')) - writeFile = open(jsonName,'w', encoding='utf-8') + jsondata = json.dumps(self.mobilizeBorrowJsonText, indent=4, ensure_ascii=False, separators=(',', ':')) + writeFile = open(jsonName, 'w', encoding='utf-8') writeFile.write(jsondata) writeFile.close() - + def processTextContent(self, responseObject): + if responseObject["classRoomName"] == "TuanJie": + print("::debug::") htmlContent = etree.HTML(responseObject["content"]) pathCount = 0 dayCount = 1 + print("URL:{}".format(responseObject["url"])) + print("爬取课表:") for path in self.pathPool: pathTemp = htmlContent.xpath(path) - pathFlag = 1# 默认置1,代表有课 - #检测文本 - if len(pathTemp)==0:# 如果该节点中长度为0,则说明没有课。 + pathFlag = 1 # 默认置1,代表有课 + # 检测文本 + print("教室:{0}{1} pathCount={2} dayCount={3} 周{3}".format(responseObject["classRoomName"], + responseObject["classRoomNum"], pathCount, + dayCount), end="") + if len(pathTemp) == 0: # 如果该节点中长度为0,则说明没有课。 pathFlag = 0 - else:# 长度不为0,说明有课。结合具体的教学周,查询本教室当前教学周是否有课 + else: # 长度不为0,说明有课。结合具体的教学周,查询本教室当前教学周是否有课 pathTemp = max(pathTemp, key=len, default='') pathFlag = self.RegStr(pathTemp) - - #append(0)为占位符,表示有课 - if 0<=pathCount<5: - if pathFlag==0:# pathFlag为0,代表无课 - self.classRoomDataJsonText[responseObject["classRoomName"]]["am12"][str(dayCount)].append(int(responseObject["classRoomNum"])) - elif 5<=pathCount<10: - if pathFlag==0: - self.classRoomDataJsonText[responseObject["classRoomName"]]["am34"][str(dayCount)].append(int(responseObject["classRoomNum"])) - elif 10<=pathCount<15: - if pathFlag==0: - self.classRoomDataJsonText[responseObject["classRoomName"]]["pm12"][str(dayCount)].append(int(responseObject["classRoomNum"])) - elif 15<=pathCount<20: - if pathFlag==0: - self.classRoomDataJsonText[responseObject["classRoomName"]]["pm34"][str(dayCount)].append(int(responseObject["classRoomNum"])) - + + # append(0)为占位符,表示有课 + if 0 <= pathCount < 5: + if pathFlag == 0: # pathFlag为0,代表无课 + """self.classRoomDataJsonText[responseObject["classRoomName"]]["am12"][str(dayCount)].append( + int(responseObject["classRoomNum"]))""" + self.classRoomDataJsonText[responseObject["classRoomName"]]["am12"][str(dayCount)].append( + (responseObject["classRoomNum"])) + print("上午12节无课") + else: + print("上午12节有课") + elif 5 <= pathCount < 10: + if pathFlag == 0: + """self.classRoomDataJsonText[responseObject["classRoomName"]]["am34"][str(dayCount)].append( + int(responseObject["classRoomNum"]))""" + self.classRoomDataJsonText[responseObject["classRoomName"]]["am34"][str(dayCount)].append( + (responseObject["classRoomNum"])) + print("上午34节无课") + else: + print("上午34节有课") + elif 10 <= pathCount < 15: + if pathFlag == 0: + """self.classRoomDataJsonText[responseObject["classRoomName"]]["pm12"][str(dayCount)].append( + int(responseObject["classRoomNum"]))""" + self.classRoomDataJsonText[responseObject["classRoomName"]]["pm12"][str(dayCount)].append( + (responseObject["classRoomNum"])) + print("下午12节无课") + else: + print("下午12节有课") + elif 15 <= pathCount < 20: + if pathFlag == 0: + """self.classRoomDataJsonText[responseObject["classRoomName"]]["pm34"][str(dayCount)].append( + int(responseObject["classRoomNum"]))""" + self.classRoomDataJsonText[responseObject["classRoomName"]]["pm34"][str(dayCount)].append( + (responseObject["classRoomNum"])) + print("下午34节无课") + else: + print("下午34节有课") + pathCount += 1 dayCount += 1 - dayCount = dayCount if dayCount< 5 else 1 + dayCount = dayCount if dayCount <= 5 else 1 # 调停课信息处理,当小于当前教学周时,不将其记录。 pathMobilize = ".//div[@class='row-fluid sortable'][2] \ /div[@class='box span12']/div[@class='box-content'] \ @@ -75,26 +109,31 @@ def processTextContent(self, responseObject): # 1、需要记录数据如下: for index in range(mobilizeTimes): pathContent = htmlContent.xpath(pathMobilize)[index] - className = pathContent[4][0].xpath('string(.)') # 课程名字 - classes = pathContent[7].xpath('string(.)') # 调课类别 - oldDate = pathContent[8].xpath('string(.)') # 原上课日期 - oldTimes = pathContent[11].xpath('string(.)') # 原节次 - oldRoom = pathContent[12][0].xpath('string(.)') # 原教室 - newDate = '' # 置空 - newTimes = '' # 置空 - newRoom = '' # 置空 - + className = pathContent[4][0].xpath('string(.)') # 课程名字 + classes = pathContent[7].xpath('string(.)') # 调课类别 + oldDate = pathContent[8].xpath('string(.)') # 原上课日期 + oldTimes = pathContent[11].xpath('string(.)') # 原节次 + oldRoom = pathContent[12][0].xpath('string(.)') # 原教室 + newDate = '' # 置空 + newTimes = '' # 置空 + newRoom = '' # 置空 + if classes == "停课" and datetime.datetime.today() > datetime.datetime.strptime(str(oldDate), "%Y-%m-%d"): + continue + # 如果当前停课的课程日期比当前日期早,那就不记录了 if classes != '停课': # 原教学周索引为9,现教学周索引为15 oldWeek = int(pathContent[9].xpath('string(.)')) newWeek = int(pathContent[15].xpath('string(.)')) # 检测原教学周与现教学周若早于当前教学周,直接跳过该组数据。 if (self.week >= max(oldWeek, newWeek)): continue - newDate = pathContent[14].xpath('string(.)') # 现上课日期 - newTimes = pathContent[17].xpath('string(.)') # 现节次 - newRoom = pathContent[18][0].xpath('string(.)') # 现教室 + newDate = pathContent[14].xpath('string(.)') # 现上课日期 + newTimes = pathContent[17].xpath('string(.)') # 现节次 + newRoom = pathContent[18][0].xpath('string(.)') # 现教室 + if datetime.datetime.today() > datetime.datetime.strptime(str(newDate), "%Y-%m-%d"): + continue + # 如果当前换课的课程日期比当前日期早,那就不记录了 self.mobilizeBorrowJsonText["mobilize"][responseObject["classRoomName"]][responseObject["classRoomNum"]] \ - .append({ + .append({ 'className': className, \ 'classes': classes, \ 'oldDate': oldDate, \ @@ -104,33 +143,42 @@ def processTextContent(self, responseObject): 'newTimes': newTimes, \ 'newRoom': newRoom} ) - + dateMode = re.compile("\d+-\d+-\d+") + dateMode1 = re.compile("\d{4,}\d{2,}\d{2,}") for index in range(borrowTimes): pathContent = htmlContent.xpath(pathBorrow)[index] - if pathContent[11].xpath('string(.)') == '否': continue # 若借用申请未通过审核,则跳过。 - borrowDate = pathContent[4].xpath('string(.)') # 借用日期 - borrowTime = pathContent[5].xpath('string(.)') # 借用时间 - borrowReason = pathContent[6].xpath('string(.)') # 借用事由 - if ( (datetime.strptime(re.findall(r"(.+?)(",borrowDate)[0],'%Y-%m-%d') - datetime.today() ).days < 0 ): - continue # 当借用日期已过期,则将其跳过。 + if pathContent[11].xpath('string(.)') == '否': continue # 若借用申请未通过审核,则跳过。 + borrowDate = pathContent[4].xpath('string(.)') # 借用日期 + borrowTime = pathContent[5].xpath('string(.)') # 借用时间 + borrowReason = pathContent[6].xpath('string(.)') # 借用事由 + if '/' in borrowDate: + borrowDate = str(borrowDate).replace("/", '-') + standardDate = dateMode.findall(borrowDate) + if len(standardDate) > 0: + standardDate = standardDate[0] + else: + standardDate = str(dateMode1.findall(borrowDate)[0]) + standardDate = standardDate[0:4] + '-' + standardDate[4:6] + '-' + standardDate[6:8] + if ((datetime.datetime.strptime(standardDate, '%Y-%m-%d') - datetime.datetime.today()).days < 0): + continue # 当借用日期已过期,则将其跳过。 self.mobilizeBorrowJsonText["borrow"][responseObject["classRoomName"]][responseObject["classRoomNum"]] \ - .append({ + .append({ 'borrowDate': borrowDate, \ 'borrowTime': borrowTime, \ - 'borrowReason': borrowReason} + 'borrowReason': borrowReason} ) def RegStr(self, string): ''' 判断当前教学周 ''' Reg1 = r'\d-\d\d' Reg2 = r'\d-\d' - if re.search(Reg1,string) is None: - it = re.search(Reg2,string) + if re.search(Reg1, string) is None: + it = re.search(Reg2, string) else: - it = re.search(Reg1,string) + it = re.search(Reg1, string) numberList = str(it.group()).split('-') for i in range(len(numberList)): numberList[i] = int(numberList[i]) - #若有课,返回1;无课,返回0 - return 1 if self.week in Interval(numberList[0],numberList[1]) else 0 \ No newline at end of file + # 若有课,返回1;无课,返回0 + return 1 if self.week in Interval(numberList[0], numberList[1]) else 0 diff --git a/refreshClassroomData.py b/refreshClassroomData.py new file mode 100644 index 0000000..bdbe06a --- /dev/null +++ b/refreshClassroomData.py @@ -0,0 +1,59 @@ +import requests +import http.cookiejar +import time +from requests.api import get + +from networkAppClass.login import Login +from networkAppClass.get_urldata import GetUrlData +from networkAppClass.computed_week import GetWeek +from networkAppClass.create_url_pool import CreateUrlPool +from networkAppClass.get_html import GetHtmlBus +from utils import loadJson + +def Work(): + """ vpn_timestamp的单位是纳秒 有效时间应该是1h + 也有可能是时间戳,13位时间戳 + wrdrecordvisit要比vpn_timestamp早一些 + 没有wrdrecordvisit参数,也能请求到数据 """ + + startCpu = time.perf_counter() # perf_counter()返回当前的计算机系统时间 + start = time.time() # time time() 返回当前时间的时间戳(1970纪元后经过的浮点秒数)。 + + # 全局变量,保存着登录后的cookie信息 + session = requests.Session() # requests.session():维持会话,可以让我们在跨请求时保存某些参数 + session.cookies = http.cookiejar.LWPCookieJar('cookie') # cookie相关 + + """ 设置登录用方法所需Headers参数 """ + headers = loadJson('config/requestConfig.json')["headers"] + session.headers = headers + # 实例化登录器,触发登录操作 + # 更新session,保存更新后响应值 + login = Login(session) # 登陆 + session = login.session + loginResponse = login.response + + # 获取四位一体课表链接 + # swytUrlText = GetUrlData(headers, session).getSwytUrl() + swytUrlText = "/http/77726476706e69737468656265737421a1a510d276693c1e2c59dae2c90476/swyt/index.php" + # 获取教务处链接 + # jwcUrlText = GetUrlData(headers, session).getJwcUrl() + # 获取教学周 + computedWeek = GetWeek(session, headers) + week = computedWeek.manual_getweek('2022-9-5') # 这里需要手动输入一下这学期的第一天,然后就可以算出当前的周数了 + + # # 批量创建链接池 + createUrlPool = CreateUrlPool(swytUrlText) + createUrlPool.createUrlObject() + urlPools = createUrlPool.urlPoolResult + # # 启动多线程并发控制 + getHtmlBus = GetHtmlBus(headers, session) + getHtmlBus.bus(urlPools, week) + + end = time.time() + endCpu = time.perf_counter() + print('程序执行时间: ', end - start) + print('CPU执行时间: ', endCpu - startCpu) + + +if __name__ == '__main__': + Work() diff --git a/server.py b/server.py new file mode 100644 index 0000000..6ab00af --- /dev/null +++ b/server.py @@ -0,0 +1,73 @@ +from sanic import Sanic, request +from sanic.response import json, text +from utils import loadJson +import refreshClassroomData +app = Sanic("MyHelloWorldApp") +ClassRoomData = loadJson("data/classRoomData.json") +BuildingConfig = loadJson("config/classRoomNumConfig.json")["classRoomNum"] +BuildingRoom = ["zhuJian", "zhongLou", "XiPei"] + +urlMap0 = {"zj": "zhuJian", "zl": "zhongLou", "xp": "XiPei", "tj": "TuanJie"} +urlMap1 = {"zhuJian": "zj", "zhongLou": "zl", "XiPei": "xp", "TuanJie": "tj"} +urlMap2 = {"tj1" : "团阶一", "tj2": "团阶二","tj3": "团阶三","tj4": "团阶四", + "tj5": "团阶五", + "tj6": "团阶六", + "tj7": "团阶七", + "tj8": "团阶八", + "tj9":"团报告厅"} +# url缩写和楼名称映射一下 + + +@app.get("/") +async def getAllMessage(request): + return json(ClassRoomData) + + +@app.get("/updateData") +async def getAllMessage(request): + try: + refreshClassroomData.Work() + return text("NO ERROR OCCURRED") + except Exception as e: + return json(e) + + +@app.get("/detail/Class") +async def getDetailedMessage(request: request.Request): + args = request.args + print(args) + try: + buildingName, Time, date = args['bd'][0], args['t'][0], args['dt'][0] + return json(ClassRoomData[urlMap0[buildingName]][Time][date]) + except Exception as e: + return json({e}) + + +@app.get("/detail/mobilize") +async def getDetailedMessageForMobilize(request: request.Request): + args = request.args + print(args) + try: + buildingName, room = args['bd'][0], args['rm'][0] + if buildingName == "tj": + room = urlMap2[room] + return json(loadJson("data/mobilizeBorrow.json")["mobilize"][urlMap0[buildingName]][room]) + except Exception as e: + return json({e}) + + +@app.get("/detail/borrow") +async def getDetailedMessageForBorrow(request: request.Request): + args = request.args + print(args) + try: + buildingName, room = args['bd'][0], args['rm'][0] + if buildingName == "tj": + room = urlMap2[room] + return json(loadJson("data/mobilizeBorrow.json")["borrow"][urlMap0[buildingName]][room]) + except Exception as e: + return json({e}) + + +if __name__ == '__main__': + app.run(host='0.0.0.0', port=8000) \ No newline at end of file diff --git a/user.json b/user.json index 0b26f3e..1562807 100644 --- a/user.json +++ b/user.json @@ -1,8 +1,8 @@ { "auth_type": "local", - "username": "", + "username": "202121460037", "sms_code": "", - "password": "", + "password": "gadx0039", "captcha": "", "needCaptcha": "false", "captcha_id": "" diff --git a/utils.py b/utils.py index 72bbbdb..a49fa50 100644 --- a/utils.py +++ b/utils.py @@ -4,8 +4,22 @@ def loadJson(fileNamePath): """ 注意,这里传入的fileNamePath相对路径,以本方法所在文件为基准 """ try: - with open(fileNamePath,'r',encoding='utf8')as(jsonFile): + with open(fileNamePath, 'r', encoding='utf8') as (jsonFile): json_data = json.load(jsonFile) return json_data except Exception as e: - pass \ No newline at end of file + pass + + +def encodeGBK(verse: str): + """ + Output Eg: %27%CD%C5%BD%D7%D2%BB%27 + """ + byte = verse.encode('GBK') + res = str(byte)[2:-1].replace("\\x", "%").upper() + return res + + +if __name__ == '__main__': + encodeGBK("团阶一") # output: %CD%C5%BD%D7%D2%BB + encodeGBK("'团报告厅'")