-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathscript.py
More file actions
27 lines (24 loc) · 988 Bytes
/
Copy pathscript.py
File metadata and controls
27 lines (24 loc) · 988 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
import arxiv, os, json, datetime as dt
from db import insert_paper # 自己封的写入 PG 函数
from quality import quality_score # 质量打分模块
KEYWORDS = ['diffusion model', 'chain of thought', 'multimodal']
MAX_DAILY = 200 # 每天最多抓 200 篇
def fetch():
for kw in KEYWORDS:
search = arxiv.Search(query=kw,
max_results=MAX_DAILY//len(KEYWORDS),
sort_by=arxiv.SortCriterion.SubmittedDate)
for r in search.results():
meta = {
'arxivid': r.entry_id.split('/')[-1],
'title': r.title,
'abstract': r.summary,
'authors': [a.name for a in r.authors],
'pdf': r.pdf_url,
'submitted': r.published,
'kw': kw
}
meta['quality'] = quality_score(meta)
insert_paper(meta)
if __name__ == '__main__':
fetch()