From bed188f154062af3c03cd14c76e0fb1d6848ec77 Mon Sep 17 00:00:00 2001 From: Charlie Lindsay Date: Thu, 15 Aug 2024 18:13:41 +0200 Subject: [PATCH] Added descriptions to tops of files --- app/data/RiskAssessment.py | 2 + app/data/example_risk_assessments.py | 4 +- app/evaluation.py | 113 ------------------ app/prompts/BasePromptInput.py | 2 + app/prompts/ControlMeasureClassification.py | 2 + app/prompts/HarmCausedAndHazardEvent.py | 2 + app/prompts/HowItHarmsInContext.py | 2 + app/prompts/NoInformationProvided.py | 2 + .../SummarizeControlMeasureFeedback.py | 2 + app/prompts/WhoItHarmsInContext.py | 2 + app/test_classes/BaseTestClass.py | 2 + .../TestBothPreventionAndMitigationInput.py | 2 + .../TestControlMeasureClassificationPrompt.py | 2 + app/test_classes/TestModelAccuracy.py | 2 + ...estModelAccuracyForCombinationOfPrompts.py | 2 + app/test_classes/TestPromptOnSingleExample.py | 2 + ...isk_domain_test_for_how_it_harms_prompt.py | 3 +- ...isk_domain_test_for_who_it_harms_prompt.py | 4 +- ..._control_measure_classification_prompts.py | 4 +- ...prompts_without_context_of_other_inputs.py | 113 ------------------ app/test_scripts/test_latency.py | 4 +- .../test_no_information_provided.py | 4 +- ...mmarize_control_measure_feedback_prompt.py | 2 +- app/test_utils/ExamplesGenerator.py | 2 + app/test_utils/InputAndExpectedOutput.py | 2 + app/utils/GoogleSheetsWriter.py | 4 +- app/utils/LLMCaller.py | 2 + app/utils/RegexPatternMatcher.py | 2 + 28 files changed, 54 insertions(+), 237 deletions(-) delete mode 100644 app/test_scripts/test_control_measure_classification_prompts_without_context_of_other_inputs.py diff --git a/app/data/RiskAssessment.py b/app/data/RiskAssessment.py index 9c6a47d..7b96cc3 100644 --- a/app/data/RiskAssessment.py +++ b/app/data/RiskAssessment.py @@ -1,3 +1,5 @@ +# Class used to create Risk Assessment examples with methods to create LLM prompts specific to the risk assessment example from LLM prompt templates + from typing import Type from ..utils.LLMCaller import * diff --git a/app/data/example_risk_assessments.py b/app/data/example_risk_assessments.py index 0e6cd8a..5f046d1 100644 --- a/app/data/example_risk_assessments.py +++ b/app/data/example_risk_assessments.py @@ -1,6 +1,4 @@ -# Learnings: -# 1. Keeping a safe distance away from a possible projectile is a prevention measure. -# The hazard event is therefore the projectile hitting someone, not the projectile being released. +# Risk Assessments used to test the accuracy of LLM prompts import numpy as np diff --git a/app/evaluation.py b/app/evaluation.py index 26fb66f..86b5f1b 100755 --- a/app/evaluation.py +++ b/app/evaluation.py @@ -26,7 +26,6 @@ class Params(TypedDict): is_feedback_text: bool is_risk_matrix: bool is_risk_assessment: bool - are_all_input_fields_entered_manually: bool LLM: str def provide_feedback_on_risk_matrix(response): @@ -237,7 +236,6 @@ def evaluation_function(response: Any, answer: Any, params: Params) -> Result: LLM_name = params["LLM"] LLM = LLM_dictionary[LLM_name] - if params['are_all_input_fields_entered_manually'] == True: activity, hazard, how_it_harms, who_it_harms, uncontrolled_likelihood, uncontrolled_severity, uncontrolled_risk, prevention, mitigation, controlled_likelihood, controlled_severity, controlled_risk = np.array(response).flatten() RA = RiskAssessment(activity=activity, hazard=hazard, who_it_harms=who_it_harms, how_it_harms=how_it_harms, @@ -393,115 +391,4 @@ def evaluation_function(response: Any, answer: Any, params: Params) -> Result: {feedback_for_correct_answers} \n\n\n\n\n {no_information_provided_message}''' - return Result(is_correct=is_everything_correct, feedback=feedback) - - if params['are_all_input_fields_entered_manually'] == False: - - prevention, mitigation = np.array(response).flatten() - - activity = 'Heat transfer lab' - hazard = 'Boiling hot water' - who_it_harms = 'Students' - how_it_harms = 'Burns' - - hazard_event = 'Boiling hot water split on student' - harm_caused = 'Burns' - - RA = RiskAssessment(activity=activity, hazard=hazard, who_it_harms=who_it_harms, how_it_harms=how_it_harms, - uncontrolled_likelihood=1, uncontrolled_severity=1, - uncontrolled_risk=1, prevention=prevention, mitigation=mitigation, - controlled_likelihood=1, controlled_severity=1, controlled_risk=1, - prevention_prompt_expected_class='prevention', mitigation_prompt_expected_class='mitigation', risk_domain='') - - input_check_feedback_message = RA.get_input_check_feedback_message() - - if input_check_feedback_message != True: - return Result(is_correct=False, - feedback=f'''\n\n\n\n\n # Feedback:\n\n\n\n\n - \n\n\n\n\n## {input_check_feedback_message}\n\n\n\n\n''') - - feedback_for_incorrect_answers = '\n\n\n\n# Feedback for Incorrect Answers\n\n\n\n' - feedback_for_correct_answers = '\n\n\n\n# Feedback for Correct Answers\n\n\n\n' - - fields_for_which_no_information_provided = [] - - is_everything_correct = True - - # PREVENTION CHECKS - no_information_provided_for_prevention_prompt_input = RA.get_no_information_provided_for_prevention_input() - no_information_provided_for_prevention_prompt_output, no_information_provided_for_prevention_pattern = RA.get_prompt_output_and_pattern_matched(prompt_input_object=no_information_provided_for_prevention_prompt_input, LLM_caller=LLM) - - if no_information_provided_for_prevention_pattern == 'no information provided' or RA.prevention == '': - fields_for_which_no_information_provided.append('Prevention') - - else: - - control_measure_prompt_with_prevention_input = RA.get_control_measure_prompt_with_prevention_input() - control_measure_prompt_with_prevention_output, control_measure_prompt_with_prevention_pattern = RA.get_prompt_output_and_pattern_matched(prompt_input_object=control_measure_prompt_with_prevention_input, - LLM_caller=LLM, - harm_caused=harm_caused, - hazard_event=hazard_event) - - feedback_for_correct_answers, feedback_for_incorrect_answers, is_everything_correct = provide_feedback_on_control_measure_input( - control_measure_input_field='prevention', - control_measure_prompt_input=control_measure_prompt_with_prevention_input, - control_measure_prompt_output=control_measure_prompt_with_prevention_output, - control_measure_prompt_pattern=control_measure_prompt_with_prevention_pattern, - feedback_for_correct_answers=feedback_for_correct_answers, - feedback_for_incorrect_answers=feedback_for_incorrect_answers, - is_everything_correct=is_everything_correct, - risk_assessment=RA, - LLM_caller=LLM - ) - - # MITIGATION CHECKS - no_information_provided_for_mitigation_prompt_input = RA.get_no_information_provided_for_mitigation_input() - no_information_provided_for_mitigation_prompt_output, no_information_provided_for_mitigation_pattern = RA.get_prompt_output_and_pattern_matched(prompt_input_object=no_information_provided_for_mitigation_prompt_input, LLM_caller=LLM) - - if no_information_provided_for_mitigation_pattern == 'no information provided' or RA.mitigation == '': - fields_for_which_no_information_provided.append('Mitigation') - else: - - control_measure_prompt_with_mitigation_input = RA.get_control_measure_prompt_with_mitigation_input() - control_measure_prompt_with_mitigation_output, control_measure_prompt_with_mitigation_pattern = RA.get_prompt_output_and_pattern_matched(prompt_input_object=control_measure_prompt_with_mitigation_input, - LLM_caller=LLM, - harm_caused=harm_caused, - hazard_event=hazard_event) - - feedback_for_correct_answers, feedback_for_incorrect_answers, is_everything_correct = provide_feedback_on_control_measure_input( - control_measure_input_field='mitigation', - control_measure_prompt_input=control_measure_prompt_with_mitigation_input, - control_measure_prompt_output=control_measure_prompt_with_mitigation_output, - control_measure_prompt_pattern=control_measure_prompt_with_mitigation_pattern, - feedback_for_correct_answers=feedback_for_correct_answers, - feedback_for_incorrect_answers=feedback_for_incorrect_answers, - is_everything_correct=is_everything_correct, - risk_assessment=RA, - LLM_caller=LLM - ) - - if is_everything_correct == True: - feedback_for_incorrect_answers = '# Congratulations! All your answers are correct!' - - if fields_for_which_no_information_provided == []: - no_information_provided_message = '' - else: - no_information_provided_message = f'\n\n\n\n\n## Fields for which no information is provided and hence no feedback given: {", ".join(fields_for_which_no_information_provided)}\n\n\n\n\n' - - if fields_for_which_no_information_provided != ['Prevention', 'Mitigation']: - hazard_event_and_harm_caused_inferred_message = f'''## The following were inferred from your answers: \n\n\n\n\n - \n\n\n\n\n### Event that leads to harm: "{hazard_event}"\n\n\n\n\n - \n\n\n\n\n### Harm caused to '{RA.who_it_harms}': "{harm_caused}".\n\n\n\n - \n\n\n\n\n### If they are incorrect, please make these more explicit in the "Hazard" and "How it harms" fields.\n\n\n\n\n''' - else: - hazard_event_and_harm_caused_inferred_message = '' - - feedback_for_correct_answers += f''' - \n\n\n\n### There are no errors in your likelihood, severity, and risk values.\n\n\n\n''' - - feedback=f'''{hazard_event_and_harm_caused_inferred_message} \n\n\n\n\n - {feedback_for_incorrect_answers} \n\n\n\n\n - {feedback_for_correct_answers} \n\n\n\n\n - {no_information_provided_message}''' - return Result(is_correct=is_everything_correct, feedback=feedback) \ No newline at end of file diff --git a/app/prompts/BasePromptInput.py b/app/prompts/BasePromptInput.py index 919181f..78e7cfd 100644 --- a/app/prompts/BasePromptInput.py +++ b/app/prompts/BasePromptInput.py @@ -1,3 +1,5 @@ +# Base class that other PromptInput classes inherit from. + try: from utils.RegexPatternMatcher import RegexPatternMatcher except: diff --git a/app/prompts/ControlMeasureClassification.py b/app/prompts/ControlMeasureClassification.py index 00a09d3..dcbaf3d 100644 --- a/app/prompts/ControlMeasureClassification.py +++ b/app/prompts/ControlMeasureClassification.py @@ -1,3 +1,5 @@ +# PromptInput class used to classify a control measure as either a prevention, mitigation, both or neither. This prompt takes the "event that leads to harm" and the "harm caused" as input. + from ..prompts.BasePromptInput import BasePromptInput from ..utils.RegexPatternMatcher import RegexPatternMatcher diff --git a/app/prompts/HarmCausedAndHazardEvent.py b/app/prompts/HarmCausedAndHazardEvent.py index 8f97ffa..d85104f 100644 --- a/app/prompts/HarmCausedAndHazardEvent.py +++ b/app/prompts/HarmCausedAndHazardEvent.py @@ -1,3 +1,5 @@ +# PromptInput class used to infer the "event that leads to harm" and the "harm caused" from the student's risk assessment inputs. + from .BasePromptInput import BasePromptInput class HarmCausedAndHazardEvent(BasePromptInput): diff --git a/app/prompts/HowItHarmsInContext.py b/app/prompts/HowItHarmsInContext.py index 25b6de6..2d9012d 100644 --- a/app/prompts/HowItHarmsInContext.py +++ b/app/prompts/HowItHarmsInContext.py @@ -1,3 +1,5 @@ +# PromptInput class that checks whether the "How it harms" input matches the "activity" and "hazard" inputs. + from ..prompts.BasePromptInput import BasePromptInput from ..utils.RegexPatternMatcher import RegexPatternMatcher diff --git a/app/prompts/NoInformationProvided.py b/app/prompts/NoInformationProvided.py index 976fdcd..c023f7d 100644 --- a/app/prompts/NoInformationProvided.py +++ b/app/prompts/NoInformationProvided.py @@ -1,3 +1,5 @@ +# PromptInput class that checks whether no information is provided in the "prevention" or "mitigation" input fields. + from .BasePromptInput import BasePromptInput class NoInformationProvided(BasePromptInput): diff --git a/app/prompts/SummarizeControlMeasureFeedback.py b/app/prompts/SummarizeControlMeasureFeedback.py index 736b5cc..e63188b 100644 --- a/app/prompts/SummarizeControlMeasureFeedback.py +++ b/app/prompts/SummarizeControlMeasureFeedback.py @@ -1,3 +1,5 @@ +# PromptInput class that takes in the output from the ControlMeasureClassification prompt and shortens it to 3 sentences. + from ..prompts.BasePromptInput import BasePromptInput class SummarizeControlMeasureFeedback(BasePromptInput): diff --git a/app/prompts/WhoItHarmsInContext.py b/app/prompts/WhoItHarmsInContext.py index b2a17a6..7034a7d 100644 --- a/app/prompts/WhoItHarmsInContext.py +++ b/app/prompts/WhoItHarmsInContext.py @@ -1,3 +1,5 @@ +# PromptInput class that checks whether the "Who it harms" input matches the "activity", "hazard" and "how it harms" inputs. + from ..prompts.BasePromptInput import BasePromptInput from ..utils.RegexPatternMatcher import RegexPatternMatcher diff --git a/app/test_classes/BaseTestClass.py b/app/test_classes/BaseTestClass.py index 1e236ee..7d3eb92 100644 --- a/app/test_classes/BaseTestClass.py +++ b/app/test_classes/BaseTestClass.py @@ -1,3 +1,5 @@ +# Base class used to test the accuracy of different prompts + from ..utils.LLMCaller import LLMCaller from ..utils.RegexPatternMatcher import RegexPatternMatcher diff --git a/app/test_classes/TestBothPreventionAndMitigationInput.py b/app/test_classes/TestBothPreventionAndMitigationInput.py index ff4c75b..7f0882a 100644 --- a/app/test_classes/TestBothPreventionAndMitigationInput.py +++ b/app/test_classes/TestBothPreventionAndMitigationInput.py @@ -1,3 +1,5 @@ +# Builds on TestControlMeasureClassificationPrompt.py to test % of times both prevention and mitigation correctly classified. + from ..test_classes.TestControlMeasureClassificationPrompt import TestControlMeasureClassificationPrompt from ..utils.LLMCaller import LLMCaller import numpy as np diff --git a/app/test_classes/TestControlMeasureClassificationPrompt.py b/app/test_classes/TestControlMeasureClassificationPrompt.py index 828f161..37cf5a7 100644 --- a/app/test_classes/TestControlMeasureClassificationPrompt.py +++ b/app/test_classes/TestControlMeasureClassificationPrompt.py @@ -1,3 +1,5 @@ +# Builds on TestModelAccuracyForCombinationOfPrompts.py to test accuracy of control measure classification. + from ..test_classes.TestModelAccuracyForCombinationOfPrompts import TestModelAccuracyForCombinationOfPrompts from ..utils.LLMCaller import LLMCaller diff --git a/app/test_classes/TestModelAccuracy.py b/app/test_classes/TestModelAccuracy.py index 2a8ce9b..470b2df 100644 --- a/app/test_classes/TestModelAccuracy.py +++ b/app/test_classes/TestModelAccuracy.py @@ -1,3 +1,5 @@ +# Builds on BaseTestClass to allow testing of multiple risk assessment examples (from data/example_risk_assessments.py) + from ..test_classes.BaseTestClass import BaseTestClass from ..utils.LLMCaller import LLMCaller import pandas as pd diff --git a/app/test_classes/TestModelAccuracyForCombinationOfPrompts.py b/app/test_classes/TestModelAccuracyForCombinationOfPrompts.py index c693d7c..f6c5663 100644 --- a/app/test_classes/TestModelAccuracyForCombinationOfPrompts.py +++ b/app/test_classes/TestModelAccuracyForCombinationOfPrompts.py @@ -1,3 +1,5 @@ +# Builds on TestModelAccuracy class to test the accuracy of multiple prompts used in sequence, e.g. the HarmCausedAndHazardEvent and ControlMeasureClassification prompts. + from ..test_classes.TestModelAccuracy import TestModelAccuracy from ..utils.LLMCaller import LLMCaller diff --git a/app/test_classes/TestPromptOnSingleExample.py b/app/test_classes/TestPromptOnSingleExample.py index 261696c..348a479 100644 --- a/app/test_classes/TestPromptOnSingleExample.py +++ b/app/test_classes/TestPromptOnSingleExample.py @@ -1,3 +1,5 @@ +# Builds on BaseTestClass.py to test prompt on single risk assessment example (this is used in unit tests) + from BaseTestClass import BaseTestClass from ..utils.LLMCaller import LLMCaller diff --git a/app/test_scripts/risk_domain_test_for_how_it_harms_prompt.py b/app/test_scripts/risk_domain_test_for_how_it_harms_prompt.py index d9a9462..acb20b1 100644 --- a/app/test_scripts/risk_domain_test_for_how_it_harms_prompt.py +++ b/app/test_scripts/risk_domain_test_for_how_it_harms_prompt.py @@ -1,4 +1,5 @@ -# python -m app.test_scripts.risk_domain_test_for_how_it_harms_prompt +# Script that tests whether the "how it harms" input is from the same risk domain as the "activity" and "hazard" inputs. +# To run, enter in terminal: python -m app.test_scripts.risk_domain_test_for_how_it_harms_prompt from ..test_classes.TestModelAccuracy import TestModelAccuracy from ..test_utils.ExamplesGenerator import ExamplesGeneratorFromCorrectExamples diff --git a/app/test_scripts/risk_domain_test_for_who_it_harms_prompt.py b/app/test_scripts/risk_domain_test_for_who_it_harms_prompt.py index fc48413..4d9a07b 100644 --- a/app/test_scripts/risk_domain_test_for_who_it_harms_prompt.py +++ b/app/test_scripts/risk_domain_test_for_who_it_harms_prompt.py @@ -1,4 +1,6 @@ -# python -m app.test_scripts.risk_domain_test_for_who_it_harms_prompt +# Script that tests whether the "who it harms" input is from the same risk domain as the "activity" and "hazard" inputs. + +# To run, enter in terminal: python -m app.test_scripts.risk_domain_test_for_who_it_harms_prompt from ..test_classes.TestModelAccuracy import TestModelAccuracy from ..test_utils.ExamplesGenerator import ExamplesGeneratorFromCorrectExamples diff --git a/app/test_scripts/test_control_measure_classification_prompts.py b/app/test_scripts/test_control_measure_classification_prompts.py index c86450d..dacaf5b 100644 --- a/app/test_scripts/test_control_measure_classification_prompts.py +++ b/app/test_scripts/test_control_measure_classification_prompts.py @@ -1,4 +1,6 @@ -# python -m app.test_scripts.test_control_measure_classification_prompts +# To run, enter in terminal: python -m app.test_scripts.test_control_measure_classification_prompts + +# Script that tests the accuracy of control measure classification prompt and performs ablation study to see impact of few-shot and chain-of-thought prompting on accuracy. from ..test_classes.TestBothPreventionAndMitigationInput import TestBothPreventionAndMitigationInput from ..test_classes.TestPreventionInput__ControlMeasureClassifiationPrompt import TestPreventionInput__ControlMeasureClassifiationPrompt diff --git a/app/test_scripts/test_control_measure_classification_prompts_without_context_of_other_inputs.py b/app/test_scripts/test_control_measure_classification_prompts_without_context_of_other_inputs.py deleted file mode 100644 index 1bfa364..0000000 --- a/app/test_scripts/test_control_measure_classification_prompts_without_context_of_other_inputs.py +++ /dev/null @@ -1,113 +0,0 @@ -# python -m app.test_scripts.test_control_measure_classification_prompts_without_context_of_other_inputs - -from ..test_classes.TestModelAccuracy import TestModelAccuracy -from ..utils.LLMCaller import * -from ..test_utils.ExamplesGenerator import RiskAssessmentExamplesGeneratorForSinglePrompt -from ..data.example_risk_assessments import * - -def test_control_measure_classification_prompt(risk_assessments_dict, - LLM, - ground_truth_parameter, - method_to_get_prompt_input, - sheet_name, - is_first_test=False): - - examples = RiskAssessmentExamplesGeneratorForSinglePrompt( - risk_assessments=risk_assessments_dict['risk_assessments'], - ground_truth_parameter=ground_truth_parameter, - method_to_get_prompt_input=method_to_get_prompt_input - ) - - test = TestModelAccuracy( - LLM=LLM, - list_of_input_and_expected_outputs=examples.get_input_and_expected_output_list(), - sheet_name=sheet_name, - examples_gathered_or_generated_message='Risk Assessments gathered from students', - domain=risk_assessments_dict['risk_domain'], - is_first_test=is_first_test - ) - - test.run_test() - -def test_prevention_classification_prompt(risk_assessments_dict, LLM, is_first_test=False): - - test_control_measure_classification_prompt( - risk_assessments_dict=risk_assessments_dict, - LLM=LLM, - ground_truth_parameter='prevention_classification_prompt_ground_truth', - method_to_get_prompt_input='get_prevention_classification_prompt_input', - sheet_name='Prevention Classification 2', - is_first_test=is_first_test - ) - -def test_mitigation_classification_prompt(risk_assessments_dict, LLM, is_first_test=False): - - test_control_measure_classification_prompt( - risk_assessments_dict=risk_assessments_dict, - LLM=LLM, - ground_truth_parameter='mitigation_classification_prompt_ground_truth', - method_to_get_prompt_input='get_mitigation_classification_prompt_input', - sheet_name='Mitigation Classification', - is_first_test=is_first_test - ) - -if __name__ == '__main__': - # test_prevention_classification_prompt( - # risk_assessments_dict=physical_risks_to_individuals__data_gathered_from_version_1_deployment, - # LLM=MistralLarge(temperature=0.1), - # is_first_test=True - # ) - - # test_prevention_classification_prompt( - # risk_assessments_dict=natural_disaster_risks, - # LLM=MistralLarge(temperature=0.1), - # is_first_test=False - # ) - - # test_prevention_classification_prompt( - # risk_assessments_dict=cybersecurity_risks, - # LLM=MistralLarge(temperature=0.1), - # is_first_test=False - # ) - - # test_prevention_classification_prompt( - # risk_assessments_dict=terrorism_risks, - # LLM=MistralLarge(temperature=0.1), - # is_first_test=False - # ) - - # test_prevention_classification_prompt( - # risk_assessments_dict=biohazard_risks, - # LLM=MistralLarge(temperature=0.1), - # is_first_test=False - # ) - - # test_mitigation_classification_prompt( - # risk_assessments_dict=physical_risks_to_individuals__data_gathered_from_version_1_deployment, - # LLM=MistralLarge(temperature=0.1), - # is_first_test=False - # ) - - test_mitigation_classification_prompt( - risk_assessments_dict=natural_disaster_risks, - LLM=MistralLarge(temperature=0.1), - is_first_test=False - ) - - # test_mitigation_classification_prompt( - # risk_assessments_dict=cybersecurity_risks, - # LLM=MistralLarge(temperature=0.1), - # is_first_test=False - # ) - - # test_mitigation_classification_prompt( - # risk_assessments_dict=terrorism_risks, - # LLM=MistralLarge(temperature=0.1), - # is_first_test=False - # ) - - # test_mitigation_classification_prompt( - # risk_assessments_dict=biohazard_risks, - # LLM=MistralLarge(temperature=0.1), - # is_first_test=False - # ) \ No newline at end of file diff --git a/app/test_scripts/test_latency.py b/app/test_scripts/test_latency.py index db0c042..b769f3a 100644 --- a/app/test_scripts/test_latency.py +++ b/app/test_scripts/test_latency.py @@ -1,4 +1,6 @@ -# python -m app.test_scripts.test_latency +# Script which tests the latency of different LLMs + +# To run, enter in terminal: python -m app.test_scripts.test_latency from ..utils.LLMCaller import * from ..evaluation import evaluation_function, Params diff --git a/app/test_scripts/test_no_information_provided.py b/app/test_scripts/test_no_information_provided.py index 71992eb..9eb0a56 100644 --- a/app/test_scripts/test_no_information_provided.py +++ b/app/test_scripts/test_no_information_provided.py @@ -1,4 +1,6 @@ -# python -m app.test_scripts.test_no_information_provided +# Script which tests the accuracy of the NoInformationProvided.py prompt + +# To run, enter in terminal: python -m app.test_scripts.test_no_information_provided from ..test_utils.InputAndExpectedOutput import InputAndExpectedOutputForSinglePrompt from ..utils.LLMCaller import * diff --git a/app/test_scripts/test_summarize_control_measure_feedback_prompt.py b/app/test_scripts/test_summarize_control_measure_feedback_prompt.py index 5c1aa81..84a1ed9 100644 --- a/app/test_scripts/test_summarize_control_measure_feedback_prompt.py +++ b/app/test_scripts/test_summarize_control_measure_feedback_prompt.py @@ -1,4 +1,4 @@ -# python -m app.test_scripts.test_summarize_control_measure_feedback_prompt +# To run, enter in terminal: python -m app.test_scripts.test_summarize_control_measure_feedback_prompt from ..utils.LLMCaller import * from ..test_classes.TestSummarizeControlMeasureFeedback import TestSummarizePreventionFeedback, TestSummarizeMitigationFeedback diff --git a/app/test_utils/ExamplesGenerator.py b/app/test_utils/ExamplesGenerator.py index 136116d..45d2b54 100644 --- a/app/test_utils/ExamplesGenerator.py +++ b/app/test_utils/ExamplesGenerator.py @@ -1,3 +1,5 @@ +# Series of classes used to create InputAndExpectedOutput objects for different tests. + from .InputAndExpectedOutput import InputAndExpectedOutputForSinglePrompt, InputAndExpectedOutputForCombinedPrompts class ExamplesGenerator: diff --git a/app/test_utils/InputAndExpectedOutput.py b/app/test_utils/InputAndExpectedOutput.py index bcd8dde..d290d56 100644 --- a/app/test_utils/InputAndExpectedOutput.py +++ b/app/test_utils/InputAndExpectedOutput.py @@ -1,3 +1,5 @@ +# Class which contains a prompt input object and the expected output for the prompt. + try: from ..prompts.BasePromptInput import BasePromptInput from ..data.RiskAssessment import RiskAssessment diff --git a/app/utils/GoogleSheetsWriter.py b/app/utils/GoogleSheetsWriter.py index fc24cdb..31427c2 100644 --- a/app/utils/GoogleSheetsWriter.py +++ b/app/utils/GoogleSheetsWriter.py @@ -1,7 +1,7 @@ -import csv +# Class used to write results from tests to google sheets. + from google.oauth2 import service_account from googleapiclient.discovery import build -from datetime import datetime class GoogleSheetsWriter: def __init__(self, sheet_name): diff --git a/app/utils/LLMCaller.py b/app/utils/LLMCaller.py index 2bde758..7b980cf 100644 --- a/app/utils/LLMCaller.py +++ b/app/utils/LLMCaller.py @@ -1,3 +1,5 @@ +# Series of classes built on LLMCaller base class, used to get LLM outputs from prompt inputs. + import openai import requests import anthropic diff --git a/app/utils/RegexPatternMatcher.py b/app/utils/RegexPatternMatcher.py index 38df661..caaa62e 100644 --- a/app/utils/RegexPatternMatcher.py +++ b/app/utils/RegexPatternMatcher.py @@ -1,3 +1,5 @@ +# Class used to extract classifications from LLM outputs, e.g. prevention/mitigation for ControlMeasureClassificationPrompt + import re class HarmCausedAndHazardEventAndHazardEvent: