# 获取issueNumber defget_issueNumber(conferenceID): """ Get the issueNumber from the website. """ conferenceID = str(conferenceID) gheaders = { 'Referer': 'https://ieeexplore.ieee.org/xpl/conhome/'+conferenceID+'/proceeding', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36' } md_url = 'https://ieeexplore.ieee.org/rest/publication/home/metadata?pubid='+conferenceID md_res = requests.get(md_url, headers = gheaders) md_dic = json.loads(md_res.text) issueNumber = str(md_dic['currentIssue']['issueNumber']) return issueNumber
# 爬取论文及其下载链接 defget_article_info(conferenceID, saveFileName): """ Collect the published paper data, and save into the csv file "saveFileName". """ # 获取issueNumber issueNumber = str(get_issueNumber(conferenceID)) conferenceID = str(conferenceID)
defshow_dialog(self, info): """ Pop up dialogs for debug. """ hint_dialog = QDialog() hint_dialog.setWindowTitle('Hint info') #hint_dialog.setWindowModality(PyQt6.QtCore.Qt.NonModal)
hint_info = QLabel(info, hint_dialog) hint_info.adjustSize() padding = 20 max_width = 360 # set the maximum width if hint_info.size().width() > max_width: hint_info.setGeometry(QRect(0, 0, max_width, 80)) hint_info.setWordWrap(True) hint_info.move(padding, padding)
defstart_collect_paper(self): if self.thread.isRunning(): return
self.startCrawling_button.setEnabled(False) self.startCrawling_button.setToolTip('I\'m trying very hard to collect papers >_<') # 先启动QThread子线程 self.thread.start() # 发送信号,启动线程处理函数 # 不能直接调用,否则会导致线程处理函数和主线程是在同一个线程,同样操作不了主界面 global logger self._start_spider.emit(self.conferenceID_edit.text(), self.saveFile_edit.text(), logger)
deffinish_collect_paper(self): self.startCrawling_button.setEnabled(True) self.startCrawling_button.setToolTip('Click and wait for collecting published paper data ^o^') self.thread.quit()
# 获取issueNumber defget_issueNumber(conferenceID): """ Get the issueNumber from the website. """ conferenceID = str(conferenceID) gheaders = { 'Referer': 'https://ieeexplore.ieee.org/xpl/conhome/'+conferenceID+'/proceeding', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36' } md_url = 'https://ieeexplore.ieee.org/rest/publication/home/metadata?pubid='+conferenceID md_res = requests.get(md_url, headers = gheaders) md_dic = json.loads(md_res.text) issueNumber = str(md_dic['currentIssue']['issueNumber']) return issueNumber
# 爬取论文及其下载链接 defget_article_info(conferenceID, saveFileName): """ Collect the published paper data, and save into the csv file "saveFileName". """ # 获取issueNumber issueNumber = str(get_issueNumber(conferenceID)) conferenceID = str(conferenceID)
defshow_dialog(self, info): """ Pop up dialogs for debug. """ hint_dialog = QDialog() hint_dialog.setWindowTitle('Hint info') #hint_dialog.setWindowModality(PyQt6.QtCore.Qt.NonModal)
hint_info = QLabel(info, hint_dialog) hint_info.adjustSize() padding = 20 max_width = 360 # set the maximum width if hint_info.size().width() > max_width: hint_info.setGeometry(QRect(0, 0, max_width, 80)) hint_info.setWordWrap(True) hint_info.move(padding, padding)
defstart_collect_paper(self): if self.thread.isRunning(): return
self.startCrawling_button.setEnabled(False) self.startCrawling_button.setToolTip('I\'m trying very hard to collect papers >_<') # 先启动QThread子线程 self.thread.start() # 发送信号,启动线程处理函数 # 不能直接调用,否则会导致线程处理函数和主线程是在同一个线程,同样操作不了主界面 global logger self._start_spider.emit(self.conferenceID_edit.text(), self.saveFile_edit.text(), logger)
deffinish_collect_paper(self): self.startCrawling_button.setEnabled(True) self.startCrawling_button.setToolTip('Click and wait for collecting published paper data ^o^') self.thread.quit()