11from sqlalchemy import select
22from sqlalchemy .ext .asyncio import AsyncSession
33
4+ from src .core .tasks .scheduled .impl .huggingface .queries .cte import HuggingfacePrereqCTEContainer
45from src .core .tasks .scheduled .impl .huggingface .queries .get .convert import convert_fine_to_coarse_record_type , \
56 convert_validated_type_to_relevant
67from src .core .tasks .scheduled .impl .huggingface .queries .get .model import GetForLoadingToHuggingFaceOutput
@@ -23,21 +24,26 @@ def __init__(self, page: int):
2324
2425
2526 async def run (self , session : AsyncSession ) -> list [GetForLoadingToHuggingFaceOutput ]:
26- label_url_id = 'url_id'
2727 label_url = 'url'
2828 label_record_type_fine = 'record_type_fine'
2929 label_html = 'html'
3030 label_type = 'type'
3131
3232
33+ cte = HuggingfacePrereqCTEContainer ()
34+
3335 query = (
3436 select (
35- URL . id . label ( label_url_id ) ,
37+ cte . url_id ,
3638 URL .full_url .label (label_url ),
3739 URLRecordType .record_type .label (label_record_type_fine ),
3840 URLCompressedHTML .compressed_html .label (label_html ),
3941 FlagURLValidated .type .label (label_type )
4042 )
43+ .join (
44+ URL ,
45+ cte .url_id == URL .id
46+ )
4147 .join (
4248 URLRecordType ,
4349 URL .id == URLRecordType .url_id
@@ -65,7 +71,7 @@ async def run(self, session: AsyncSession) -> list[GetForLoadingToHuggingFaceOut
6571 final_results = []
6672 for result in db_results :
6773 output = GetForLoadingToHuggingFaceOutput (
68- url_id = result [label_url_id ],
74+ url_id = result [cte . url_id ],
6975 url = result [label_url ],
7076 relevant = convert_validated_type_to_relevant (
7177 URLType (result [label_type ])
0 commit comments