Conversation
| while line != '' and len(all_matches) < max_matches: | ||
| data = line.strip().split(',') | ||
| if re.search(rf'\b{spanish_word}\b', data[0], re.IGNORECASE): | ||
| if re.search(rf'\b{re.escape(spanish_word)}\b', data[0], re.IGNORECASE): |
There was a problem hiding this comment.
Así lo que está en spanish_word no se toma como regex
| inputs[j] += list(output.outputs[0].token_ids) | ||
| responses[j] += tokenizer.eos_token | ||
| inputs[j] += [tokenizer.eos_token_id] | ||
| mask[j] += [1] |
There was a problem hiding this comment.
Cuando se supera el límite de tokens de generación, no se concatenan los tokens para no gastar recursos de computo en más tokens que igual no llevan a ninguna recompensa.
|
|
||
| def extract_answer(response, transform_fn = lambda x: x, nan_val = None)->str|None: | ||
| ans = re.match('.*?<answer>(.*?)</answer>', response, re.DOTALL|re.MULTILINE) | ||
| ans = re.match('.*?<answer>(.*?)</answer>\s*$', response, re.DOTALL|re.MULTILINE) |
There was a problem hiding this comment.
Para obtener la recompensa debe terminar con los tags de answer
| logger.debug(f'Rewards: {rewards[torch.arange(len(samples)), eos_index]}') | ||
|
|
||
| return rewards | ||
|
|
| rewards = get_rewards_translation(inputs, is_terminal, wayuu_text) | ||
| return inputs, rewards, is_terminal, complete_prompts, prompt_length, mask | ||
|
|
||
|
|
There was a problem hiding this comment.
Adaptación para generar un episodio. Generar x simulaciones a partir de una traducción en español y evaluarlas con el BLEU
| wayuu = self.wayuu_lines[idx] | ||
|
|
||
| return spa, wayuu | ||
| # %% [markdown] |
There was a problem hiding this comment.
Leer el dataset de tradcción. No es la manera más eficiente porque se guarda completo en RAM.
| no_kl=True | ||
| enabled_tools = ['calculator', 'spa_to_wayu'] | ||
| logger.info(f'Hyperparameters:\nupdate_epochs:{update_epochs}\nrl_steps:{rl_steps}\nsims_per_prompt:{sims_per_prompt}\nminibatch_size:{minibatch_size}\npolicy_lr:{policy_lr}\nwarmup_steps:{warmup_steps}\ngae_lambda: {gae_lambda}\nnormalize advantage:{normalize_advantage}\nlower_clip:{lower_clip}\nupper_clip:{upper_clip}\nkl_penalty_coef:{kl_penalty_coef}\ntemperature:{temperature}\ndr_grpo:{dr_grpo}\nno_kl={no_kl}\nuse_deepspeed={use_deepspeed}\nuse_vllm={use_vllm}\nenabled_tools={enabled_tools}\nbase_model_name={base_model_name}') | ||
| max_new_tokens=512 |
There was a problem hiding this comment.
Aumenté la cantidad de tokens que se pueden generar
| scheduling_policy="fcfs", | ||
| dtype=torch.bfloat16, | ||
| max_model_len=2048, | ||
| max_model_len=768, |
There was a problem hiding this comment.
No estoy seguro, pero de pronto esto reduzca el uso de memoria, igual no se requieren tantos tokens.
| acc = eval_multiplication(model_engine, tokenizer, epochs=40, batch_size=64) | ||
| else: | ||
| with model_engine.disable_adapter(): | ||
| acc = eval_multiplication(model_engine, tokenizer, epochs=40, batch_size=64) |
There was a problem hiding this comment.
Falata hacer una función de evaluación para sacar un estimado del desempeño a medida que entrenamos
No description provided.