answer_question_bot/chat.py at main · smith-source/answer_question_bot · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
from transformers import AutoModel, AutoTokenizer

# global cut_img

def read_img(img):
    tokenizer = AutoTokenizer.from_pretrained('/GOT-OCR2_0', trust_remote_code=True)
    model = AutoModel.from_pretrained('/GOT-OCR2_0', trust_remote_code=True, low_cpu_mem_usage=True, device_map='cuda', use_safetensors=True, pad_token_id=tokenizer.eos_token_id)
    model = model.eval().cuda()


    # input your test image
    image_file = '/IMG_20241124_011423.jpg'

    # plain texts OCR
    res = model.chat(tokenizer, img, ocr_type='ocr', gradio_input=True)

    # format texts OCR:
    # res = model.chat(tokenizer, image_file, ocr_type='format')

    # fine-grained OCR:
    # res = model.chat(tokenizer, image_file, ocr_type='ocr', ocr_box='')
    # res = model.chat(tokenizer, image_file, ocr_type='format', ocr_box='')
    # res = model.chat(tokenizer, image_file, ocr_type='ocr', ocr_color='')
    # res = model.chat(tokenizer, image_file, ocr_type='format', ocr_color='')

    # multi-crop OCR:
    # res = model.chat_crop(tokenizer, image_file, ocr_type='ocr')
    # res = model.chat_crop(tokenizer, image_file, ocr_type='format')

    # render the formatted OCR results:
    # res = model.chat(tokenizer, image_file, ocr_type='format', render=True, save_render_file = './demo.html')

    print(res)
    return res