-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcreate_input_jsonl.py
More file actions
56 lines (47 loc) · 1.68 KB
/
create_input_jsonl.py
File metadata and controls
56 lines (47 loc) · 1.68 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import json
import os
from google.cloud import storage
from dotenv import load_dotenv
from models import TestEntry
load_dotenv()
# get environment variables
BUCKET_NAME = os.environ.get("BUCKET_NAME")
INPUT_FOLDER = os.environ.get("INPUT_FOLDER")
INPUT_JSONL_NAME = os.environ.get("INPUT_JSONL_NAME")
PROMPT = os.environ.get("PROMPT")
# initialize client and get the bucket
client = storage.Client()
bucket = client.get_bucket(BUCKET_NAME)
# get all files in the specified input folder
blobs = bucket.list_blobs(prefix=INPUT_FOLDER)
# generate the input JSONL file
with open(INPUT_JSONL_NAME, "w") as f:
for blob in blobs:
# use the filename as the unique 'key'
filename = blob.name.replace(INPUT_FOLDER + "/", "")
if not filename:
continue
# construct the request
request = {
"key": filename,
"request": {
"contents": [{
"role": "user",
"parts": [
{"text": PROMPT},
{"file_data": {
"mime_type": "text/plain",
"file_uri": f"gs://{BUCKET_NAME}/{blob.name}"
}}
]
}],
'generation_config': {
'temperature': 0.0,
'response_mime_type': 'application/json',
'response_schema': TestEntry.model_json_schema()
}
}
}
# write the request as a JSON object to the JSONL file
f.write(json.dumps(request) + "\n")
print(f"✅ Successfully Created {INPUT_JSONL_NAME} locally.")