From bed188f154062af3c03cd14c76e0fb1d6848ec77 Mon Sep 17 00:00:00 2001
From: Charlie Lindsay <charlieblindsay@gmail.com>
Date: Thu, 15 Aug 2024 18:13:41 +0200
Subject: [PATCH] Added descriptions to tops of files

---
 app/data/RiskAssessment.py                    |   2 +
 app/data/example_risk_assessments.py          |   4 +-
 app/evaluation.py                             | 113 ------------------
 app/prompts/BasePromptInput.py                |   2 +
 app/prompts/ControlMeasureClassification.py   |   2 +
 app/prompts/HarmCausedAndHazardEvent.py       |   2 +
 app/prompts/HowItHarmsInContext.py            |   2 +
 app/prompts/NoInformationProvided.py          |   2 +
 .../SummarizeControlMeasureFeedback.py        |   2 +
 app/prompts/WhoItHarmsInContext.py            |   2 +
 app/test_classes/BaseTestClass.py             |   2 +
 .../TestBothPreventionAndMitigationInput.py   |   2 +
 .../TestControlMeasureClassificationPrompt.py |   2 +
 app/test_classes/TestModelAccuracy.py         |   2 +
 ...estModelAccuracyForCombinationOfPrompts.py |   2 +
 app/test_classes/TestPromptOnSingleExample.py |   2 +
 ...isk_domain_test_for_how_it_harms_prompt.py |   3 +-
 ...isk_domain_test_for_who_it_harms_prompt.py |   4 +-
 ..._control_measure_classification_prompts.py |   4 +-
 ...prompts_without_context_of_other_inputs.py | 113 ------------------
 app/test_scripts/test_latency.py              |   4 +-
 .../test_no_information_provided.py           |   4 +-
 ...mmarize_control_measure_feedback_prompt.py |   2 +-
 app/test_utils/ExamplesGenerator.py           |   2 +
 app/test_utils/InputAndExpectedOutput.py      |   2 +
 app/utils/GoogleSheetsWriter.py               |   4 +-
 app/utils/LLMCaller.py                        |   2 +
 app/utils/RegexPatternMatcher.py              |   2 +
 28 files changed, 54 insertions(+), 237 deletions(-)
 delete mode 100644 app/test_scripts/test_control_measure_classification_prompts_without_context_of_other_inputs.py

diff --git a/app/data/RiskAssessment.py b/app/data/RiskAssessment.py
index 9c6a47d..7b96cc3 100644
--- a/app/data/RiskAssessment.py
+++ b/app/data/RiskAssessment.py
@@ -1,3 +1,5 @@
+# Class used to create Risk Assessment examples with methods to create LLM prompts specific to the risk assessment example from LLM prompt templates 
+
 from typing import Type
 
 from ..utils.LLMCaller import *
diff --git a/app/data/example_risk_assessments.py b/app/data/example_risk_assessments.py
index 0e6cd8a..5f046d1 100644
--- a/app/data/example_risk_assessments.py
+++ b/app/data/example_risk_assessments.py
@@ -1,6 +1,4 @@
-# Learnings:
-# 1. Keeping a safe distance away from a possible projectile is a prevention measure.
-# The hazard event is therefore the projectile hitting someone, not the projectile being released.
+# Risk Assessments used to test the accuracy of LLM prompts
 
 import numpy as np
 
diff --git a/app/evaluation.py b/app/evaluation.py
index 26fb66f..86b5f1b 100755
--- a/app/evaluation.py
+++ b/app/evaluation.py
@@ -26,7 +26,6 @@ class Params(TypedDict):
     is_feedback_text: bool
     is_risk_matrix: bool
     is_risk_assessment: bool
-    are_all_input_fields_entered_manually: bool
     LLM: str
 
 def provide_feedback_on_risk_matrix(response):
@@ -237,7 +236,6 @@ def evaluation_function(response: Any, answer: Any, params: Params) -> Result:
         LLM_name = params["LLM"]
         LLM = LLM_dictionary[LLM_name]
     
-    if params['are_all_input_fields_entered_manually'] == True:
         activity, hazard, how_it_harms, who_it_harms, uncontrolled_likelihood, uncontrolled_severity, uncontrolled_risk, prevention, mitigation, controlled_likelihood, controlled_severity, controlled_risk = np.array(response).flatten()
 
         RA = RiskAssessment(activity=activity, hazard=hazard, who_it_harms=who_it_harms, how_it_harms=how_it_harms,
@@ -393,115 +391,4 @@ def evaluation_function(response: Any, answer: Any, params: Params) -> Result:
         {feedback_for_correct_answers} \n\n\n\n\n
         {no_information_provided_message}'''
 
-        return Result(is_correct=is_everything_correct, feedback=feedback)
-    
-    if params['are_all_input_fields_entered_manually'] == False:
-
-        prevention, mitigation = np.array(response).flatten()
-
-        activity = 'Heat transfer lab'
-        hazard = 'Boiling hot water'
-        who_it_harms = 'Students'
-        how_it_harms = 'Burns'
-
-        hazard_event = 'Boiling hot water split on student'
-        harm_caused = 'Burns'
-
-        RA = RiskAssessment(activity=activity, hazard=hazard, who_it_harms=who_it_harms, how_it_harms=how_it_harms,
-                            uncontrolled_likelihood=1, uncontrolled_severity=1,
-                            uncontrolled_risk=1, prevention=prevention, mitigation=mitigation,
-                            controlled_likelihood=1, controlled_severity=1, controlled_risk=1,
-                            prevention_prompt_expected_class='prevention', mitigation_prompt_expected_class='mitigation', risk_domain='')
-
-        input_check_feedback_message = RA.get_input_check_feedback_message()
-
-        if input_check_feedback_message != True:
-            return Result(is_correct=False,
-                        feedback=f'''\n\n\n\n\n # Feedback:\n\n\n\n\n
-                                    \n\n\n\n\n## {input_check_feedback_message}\n\n\n\n\n''')
-        
-        feedback_for_incorrect_answers = '\n\n\n\n# Feedback for Incorrect Answers\n\n\n\n'
-        feedback_for_correct_answers = '\n\n\n\n# Feedback for Correct Answers\n\n\n\n'
-
-        fields_for_which_no_information_provided = []
-
-        is_everything_correct = True
-
-        # PREVENTION CHECKS
-        no_information_provided_for_prevention_prompt_input = RA.get_no_information_provided_for_prevention_input()
-        no_information_provided_for_prevention_prompt_output, no_information_provided_for_prevention_pattern = RA.get_prompt_output_and_pattern_matched(prompt_input_object=no_information_provided_for_prevention_prompt_input, LLM_caller=LLM)
-
-        if no_information_provided_for_prevention_pattern == 'no information provided' or RA.prevention == '':
-            fields_for_which_no_information_provided.append('Prevention')
-        
-        else:
-
-            control_measure_prompt_with_prevention_input = RA.get_control_measure_prompt_with_prevention_input()
-            control_measure_prompt_with_prevention_output, control_measure_prompt_with_prevention_pattern = RA.get_prompt_output_and_pattern_matched(prompt_input_object=control_measure_prompt_with_prevention_input, 
-                                                                                                                                                     LLM_caller=LLM,
-                                                                                                                                                     harm_caused=harm_caused, 
-                                                                                                                                                     hazard_event=hazard_event)
-
-            feedback_for_correct_answers, feedback_for_incorrect_answers, is_everything_correct = provide_feedback_on_control_measure_input(
-                control_measure_input_field='prevention',
-                control_measure_prompt_input=control_measure_prompt_with_prevention_input,
-                control_measure_prompt_output=control_measure_prompt_with_prevention_output,
-                control_measure_prompt_pattern=control_measure_prompt_with_prevention_pattern,
-                feedback_for_correct_answers=feedback_for_correct_answers,
-                feedback_for_incorrect_answers=feedback_for_incorrect_answers,
-                is_everything_correct=is_everything_correct,
-                risk_assessment=RA,
-                LLM_caller=LLM
-            )
-
-        # MITIGATION CHECKS
-        no_information_provided_for_mitigation_prompt_input = RA.get_no_information_provided_for_mitigation_input()
-        no_information_provided_for_mitigation_prompt_output, no_information_provided_for_mitigation_pattern = RA.get_prompt_output_and_pattern_matched(prompt_input_object=no_information_provided_for_mitigation_prompt_input, LLM_caller=LLM)
-
-        if no_information_provided_for_mitigation_pattern == 'no information provided' or RA.mitigation == '':
-            fields_for_which_no_information_provided.append('Mitigation')
-        else:
-            
-            control_measure_prompt_with_mitigation_input = RA.get_control_measure_prompt_with_mitigation_input()
-            control_measure_prompt_with_mitigation_output, control_measure_prompt_with_mitigation_pattern = RA.get_prompt_output_and_pattern_matched(prompt_input_object=control_measure_prompt_with_mitigation_input, 
-                                                                                                                                                     LLM_caller=LLM, 
-                                                                                                                                                     harm_caused=harm_caused, 
-                                                                                                                                                     hazard_event=hazard_event)
-            
-            feedback_for_correct_answers, feedback_for_incorrect_answers, is_everything_correct = provide_feedback_on_control_measure_input(
-                control_measure_input_field='mitigation',
-                control_measure_prompt_input=control_measure_prompt_with_mitigation_input,
-                control_measure_prompt_output=control_measure_prompt_with_mitigation_output,
-                control_measure_prompt_pattern=control_measure_prompt_with_mitigation_pattern,
-                feedback_for_correct_answers=feedback_for_correct_answers,
-                feedback_for_incorrect_answers=feedback_for_incorrect_answers,
-                is_everything_correct=is_everything_correct,
-                risk_assessment=RA,
-                LLM_caller=LLM
-            )
-
-        if is_everything_correct == True:
-            feedback_for_incorrect_answers = '# Congratulations! All your answers are correct!'
-        
-        if fields_for_which_no_information_provided == []:
-            no_information_provided_message = ''
-        else:
-            no_information_provided_message = f'\n\n\n\n\n## Fields for which no information is provided and hence no feedback given: {", ".join(fields_for_which_no_information_provided)}\n\n\n\n\n'
-
-        if fields_for_which_no_information_provided != ['Prevention', 'Mitigation']:
-            hazard_event_and_harm_caused_inferred_message = f'''## The following were inferred from your answers: \n\n\n\n\n
-            \n\n\n\n\n### Event that leads to harm: "{hazard_event}"\n\n\n\n\n
-            \n\n\n\n\n### Harm caused to '{RA.who_it_harms}': "{harm_caused}".\n\n\n\n
-            \n\n\n\n\n### If they are incorrect, please make these more explicit in the "Hazard" and "How it harms" fields.\n\n\n\n\n'''
-        else:
-            hazard_event_and_harm_caused_inferred_message = ''
-        
-        feedback_for_correct_answers += f'''
-        \n\n\n\n### There are no errors in your likelihood, severity, and risk values.\n\n\n\n'''
-
-        feedback=f'''{hazard_event_and_harm_caused_inferred_message} \n\n\n\n\n
-        {feedback_for_incorrect_answers} \n\n\n\n\n
-        {feedback_for_correct_answers} \n\n\n\n\n
-        {no_information_provided_message}'''
-
         return Result(is_correct=is_everything_correct, feedback=feedback)
\ No newline at end of file
diff --git a/app/prompts/BasePromptInput.py b/app/prompts/BasePromptInput.py
index 919181f..78e7cfd 100644
--- a/app/prompts/BasePromptInput.py
+++ b/app/prompts/BasePromptInput.py
@@ -1,3 +1,5 @@
+# Base class that other PromptInput classes inherit from.
+
 try:
     from utils.RegexPatternMatcher import RegexPatternMatcher
 except:
diff --git a/app/prompts/ControlMeasureClassification.py b/app/prompts/ControlMeasureClassification.py
index 00a09d3..dcbaf3d 100644
--- a/app/prompts/ControlMeasureClassification.py
+++ b/app/prompts/ControlMeasureClassification.py
@@ -1,3 +1,5 @@
+# PromptInput class used to classify a control measure as either a prevention, mitigation, both or neither. This prompt takes the "event that leads to harm" and the "harm caused" as input.
+
 from ..prompts.BasePromptInput import BasePromptInput
 from ..utils.RegexPatternMatcher import RegexPatternMatcher
 
diff --git a/app/prompts/HarmCausedAndHazardEvent.py b/app/prompts/HarmCausedAndHazardEvent.py
index 8f97ffa..d85104f 100644
--- a/app/prompts/HarmCausedAndHazardEvent.py
+++ b/app/prompts/HarmCausedAndHazardEvent.py
@@ -1,3 +1,5 @@
+# PromptInput class used to infer the "event that leads to harm" and the "harm caused" from the student's risk assessment inputs.
+
 from .BasePromptInput import BasePromptInput
 
 class HarmCausedAndHazardEvent(BasePromptInput):
diff --git a/app/prompts/HowItHarmsInContext.py b/app/prompts/HowItHarmsInContext.py
index 25b6de6..2d9012d 100644
--- a/app/prompts/HowItHarmsInContext.py
+++ b/app/prompts/HowItHarmsInContext.py
@@ -1,3 +1,5 @@
+# PromptInput class that checks whether the "How it harms" input matches the "activity" and "hazard" inputs.
+
 from ..prompts.BasePromptInput import BasePromptInput
 from ..utils.RegexPatternMatcher import RegexPatternMatcher
 
diff --git a/app/prompts/NoInformationProvided.py b/app/prompts/NoInformationProvided.py
index 976fdcd..c023f7d 100644
--- a/app/prompts/NoInformationProvided.py
+++ b/app/prompts/NoInformationProvided.py
@@ -1,3 +1,5 @@
+# PromptInput class that checks whether no information is provided in the "prevention" or "mitigation" input fields.
+
 from .BasePromptInput import BasePromptInput
 
 class NoInformationProvided(BasePromptInput):
diff --git a/app/prompts/SummarizeControlMeasureFeedback.py b/app/prompts/SummarizeControlMeasureFeedback.py
index 736b5cc..e63188b 100644
--- a/app/prompts/SummarizeControlMeasureFeedback.py
+++ b/app/prompts/SummarizeControlMeasureFeedback.py
@@ -1,3 +1,5 @@
+# PromptInput class that takes in the output from the ControlMeasureClassification prompt and shortens it to 3 sentences.
+
 from ..prompts.BasePromptInput import BasePromptInput
 
 class SummarizeControlMeasureFeedback(BasePromptInput):
diff --git a/app/prompts/WhoItHarmsInContext.py b/app/prompts/WhoItHarmsInContext.py
index b2a17a6..7034a7d 100644
--- a/app/prompts/WhoItHarmsInContext.py
+++ b/app/prompts/WhoItHarmsInContext.py
@@ -1,3 +1,5 @@
+# PromptInput class that checks whether the "Who it harms" input matches the "activity", "hazard" and "how it harms" inputs.
+
 from ..prompts.BasePromptInput import BasePromptInput
 from ..utils.RegexPatternMatcher import RegexPatternMatcher
 
diff --git a/app/test_classes/BaseTestClass.py b/app/test_classes/BaseTestClass.py
index 1e236ee..7d3eb92 100644
--- a/app/test_classes/BaseTestClass.py
+++ b/app/test_classes/BaseTestClass.py
@@ -1,3 +1,5 @@
+# Base class used to test the accuracy of different prompts
+
 from ..utils.LLMCaller import LLMCaller
 from ..utils.RegexPatternMatcher import RegexPatternMatcher
 
diff --git a/app/test_classes/TestBothPreventionAndMitigationInput.py b/app/test_classes/TestBothPreventionAndMitigationInput.py
index ff4c75b..7f0882a 100644
--- a/app/test_classes/TestBothPreventionAndMitigationInput.py
+++ b/app/test_classes/TestBothPreventionAndMitigationInput.py
@@ -1,3 +1,5 @@
+# Builds on TestControlMeasureClassificationPrompt.py to test % of times both prevention and mitigation correctly classified.
+
 from ..test_classes.TestControlMeasureClassificationPrompt import TestControlMeasureClassificationPrompt
 from ..utils.LLMCaller import LLMCaller
 import numpy as np
diff --git a/app/test_classes/TestControlMeasureClassificationPrompt.py b/app/test_classes/TestControlMeasureClassificationPrompt.py
index 828f161..37cf5a7 100644
--- a/app/test_classes/TestControlMeasureClassificationPrompt.py
+++ b/app/test_classes/TestControlMeasureClassificationPrompt.py
@@ -1,3 +1,5 @@
+# Builds on TestModelAccuracyForCombinationOfPrompts.py to test accuracy of control measure classification.
+
 from ..test_classes.TestModelAccuracyForCombinationOfPrompts import TestModelAccuracyForCombinationOfPrompts
 from ..utils.LLMCaller import LLMCaller
 
diff --git a/app/test_classes/TestModelAccuracy.py b/app/test_classes/TestModelAccuracy.py
index 2a8ce9b..470b2df 100644
--- a/app/test_classes/TestModelAccuracy.py
+++ b/app/test_classes/TestModelAccuracy.py
@@ -1,3 +1,5 @@
+# Builds on BaseTestClass to allow testing of multiple risk assessment examples (from data/example_risk_assessments.py)
+
 from ..test_classes.BaseTestClass import BaseTestClass
 from ..utils.LLMCaller import LLMCaller
 import pandas as pd
diff --git a/app/test_classes/TestModelAccuracyForCombinationOfPrompts.py b/app/test_classes/TestModelAccuracyForCombinationOfPrompts.py
index c693d7c..f6c5663 100644
--- a/app/test_classes/TestModelAccuracyForCombinationOfPrompts.py
+++ b/app/test_classes/TestModelAccuracyForCombinationOfPrompts.py
@@ -1,3 +1,5 @@
+# Builds on TestModelAccuracy class to test the accuracy of multiple prompts used in sequence, e.g. the HarmCausedAndHazardEvent and ControlMeasureClassification prompts.
+
 from ..test_classes.TestModelAccuracy import TestModelAccuracy
 from ..utils.LLMCaller import LLMCaller
 
diff --git a/app/test_classes/TestPromptOnSingleExample.py b/app/test_classes/TestPromptOnSingleExample.py
index 261696c..348a479 100644
--- a/app/test_classes/TestPromptOnSingleExample.py
+++ b/app/test_classes/TestPromptOnSingleExample.py
@@ -1,3 +1,5 @@
+# Builds on BaseTestClass.py to test prompt on single risk assessment example (this is used in unit tests)
+
 from BaseTestClass import BaseTestClass
 from ..utils.LLMCaller import LLMCaller
 
diff --git a/app/test_scripts/risk_domain_test_for_how_it_harms_prompt.py b/app/test_scripts/risk_domain_test_for_how_it_harms_prompt.py
index d9a9462..acb20b1 100644
--- a/app/test_scripts/risk_domain_test_for_how_it_harms_prompt.py
+++ b/app/test_scripts/risk_domain_test_for_how_it_harms_prompt.py
@@ -1,4 +1,5 @@
-# python -m app.test_scripts.risk_domain_test_for_how_it_harms_prompt
+# Script that tests whether the "how it harms" input is from the same risk domain as the "activity" and "hazard" inputs.
+# To run, enter in terminal: python -m app.test_scripts.risk_domain_test_for_how_it_harms_prompt
 
 from ..test_classes.TestModelAccuracy import TestModelAccuracy
 from ..test_utils.ExamplesGenerator import ExamplesGeneratorFromCorrectExamples
diff --git a/app/test_scripts/risk_domain_test_for_who_it_harms_prompt.py b/app/test_scripts/risk_domain_test_for_who_it_harms_prompt.py
index fc48413..4d9a07b 100644
--- a/app/test_scripts/risk_domain_test_for_who_it_harms_prompt.py
+++ b/app/test_scripts/risk_domain_test_for_who_it_harms_prompt.py
@@ -1,4 +1,6 @@
-# python -m app.test_scripts.risk_domain_test_for_who_it_harms_prompt
+# Script that tests whether the "who it harms" input is from the same risk domain as the "activity" and "hazard" inputs.
+
+# To run, enter in terminal: python -m app.test_scripts.risk_domain_test_for_who_it_harms_prompt
 
 from ..test_classes.TestModelAccuracy import TestModelAccuracy
 from ..test_utils.ExamplesGenerator import ExamplesGeneratorFromCorrectExamples
diff --git a/app/test_scripts/test_control_measure_classification_prompts.py b/app/test_scripts/test_control_measure_classification_prompts.py
index c86450d..dacaf5b 100644
--- a/app/test_scripts/test_control_measure_classification_prompts.py
+++ b/app/test_scripts/test_control_measure_classification_prompts.py
@@ -1,4 +1,6 @@
-# python -m app.test_scripts.test_control_measure_classification_prompts
+# To run, enter in terminal: python -m app.test_scripts.test_control_measure_classification_prompts
+
+# Script that tests the accuracy of control measure classification prompt and performs ablation study to see impact of few-shot and chain-of-thought prompting on accuracy.
 
 from ..test_classes.TestBothPreventionAndMitigationInput import TestBothPreventionAndMitigationInput
 from ..test_classes.TestPreventionInput__ControlMeasureClassifiationPrompt import TestPreventionInput__ControlMeasureClassifiationPrompt
diff --git a/app/test_scripts/test_control_measure_classification_prompts_without_context_of_other_inputs.py b/app/test_scripts/test_control_measure_classification_prompts_without_context_of_other_inputs.py
deleted file mode 100644
index 1bfa364..0000000
--- a/app/test_scripts/test_control_measure_classification_prompts_without_context_of_other_inputs.py
+++ /dev/null
@@ -1,113 +0,0 @@
-# python -m app.test_scripts.test_control_measure_classification_prompts_without_context_of_other_inputs
-
-from ..test_classes.TestModelAccuracy import TestModelAccuracy
-from ..utils.LLMCaller import *
-from ..test_utils.ExamplesGenerator import RiskAssessmentExamplesGeneratorForSinglePrompt
-from ..data.example_risk_assessments import *
-
-def test_control_measure_classification_prompt(risk_assessments_dict, 
-                                               LLM, 
-                                               ground_truth_parameter,
-                                               method_to_get_prompt_input,
-                                               sheet_name,
-                                               is_first_test=False):
-    
-    examples = RiskAssessmentExamplesGeneratorForSinglePrompt(
-        risk_assessments=risk_assessments_dict['risk_assessments'],
-        ground_truth_parameter=ground_truth_parameter,
-        method_to_get_prompt_input=method_to_get_prompt_input
-    )
-
-    test = TestModelAccuracy(
-        LLM=LLM,
-        list_of_input_and_expected_outputs=examples.get_input_and_expected_output_list(),
-        sheet_name=sheet_name,
-        examples_gathered_or_generated_message='Risk Assessments gathered from students',
-        domain=risk_assessments_dict['risk_domain'],
-        is_first_test=is_first_test
-    )
-
-    test.run_test()
-
-def test_prevention_classification_prompt(risk_assessments_dict, LLM, is_first_test=False):
-
-    test_control_measure_classification_prompt(
-        risk_assessments_dict=risk_assessments_dict,
-        LLM=LLM,
-        ground_truth_parameter='prevention_classification_prompt_ground_truth',
-        method_to_get_prompt_input='get_prevention_classification_prompt_input',
-        sheet_name='Prevention Classification 2',
-        is_first_test=is_first_test
-    )
-
-def test_mitigation_classification_prompt(risk_assessments_dict, LLM, is_first_test=False):
-
-    test_control_measure_classification_prompt(
-        risk_assessments_dict=risk_assessments_dict,
-        LLM=LLM,
-        ground_truth_parameter='mitigation_classification_prompt_ground_truth',
-        method_to_get_prompt_input='get_mitigation_classification_prompt_input',
-        sheet_name='Mitigation Classification',
-        is_first_test=is_first_test
-    )
-
-if __name__ == '__main__':
-    # test_prevention_classification_prompt(
-    #     risk_assessments_dict=physical_risks_to_individuals__data_gathered_from_version_1_deployment,
-    #     LLM=MistralLarge(temperature=0.1),
-    #     is_first_test=True
-    # )
-
-    # test_prevention_classification_prompt(
-    #     risk_assessments_dict=natural_disaster_risks,
-    #     LLM=MistralLarge(temperature=0.1),
-    #     is_first_test=False
-    # )
-
-    # test_prevention_classification_prompt(
-    #     risk_assessments_dict=cybersecurity_risks,
-    #     LLM=MistralLarge(temperature=0.1),
-    #     is_first_test=False
-    # )
-
-    # test_prevention_classification_prompt(
-    #     risk_assessments_dict=terrorism_risks,
-    #     LLM=MistralLarge(temperature=0.1),
-    #     is_first_test=False
-    # )
-
-    # test_prevention_classification_prompt(
-    #     risk_assessments_dict=biohazard_risks,
-    #     LLM=MistralLarge(temperature=0.1),
-    #     is_first_test=False
-    # )
-
-    # test_mitigation_classification_prompt(
-    #     risk_assessments_dict=physical_risks_to_individuals__data_gathered_from_version_1_deployment,
-    #     LLM=MistralLarge(temperature=0.1),
-    #     is_first_test=False
-    # )
-
-    test_mitigation_classification_prompt(
-        risk_assessments_dict=natural_disaster_risks,
-        LLM=MistralLarge(temperature=0.1),
-        is_first_test=False
-    )
-
-    # test_mitigation_classification_prompt(
-    #     risk_assessments_dict=cybersecurity_risks,
-    #     LLM=MistralLarge(temperature=0.1),
-    #     is_first_test=False
-    # )
-
-    # test_mitigation_classification_prompt(
-    #     risk_assessments_dict=terrorism_risks,
-    #     LLM=MistralLarge(temperature=0.1),
-    #     is_first_test=False
-    # )
-
-    # test_mitigation_classification_prompt(
-    #     risk_assessments_dict=biohazard_risks,
-    #     LLM=MistralLarge(temperature=0.1),
-    #     is_first_test=False
-    # )
\ No newline at end of file
diff --git a/app/test_scripts/test_latency.py b/app/test_scripts/test_latency.py
index db0c042..b769f3a 100644
--- a/app/test_scripts/test_latency.py
+++ b/app/test_scripts/test_latency.py
@@ -1,4 +1,6 @@
-# python -m app.test_scripts.test_latency
+# Script which tests the latency of different LLMs
+
+# To run, enter in terminal: python -m app.test_scripts.test_latency
 
 from ..utils.LLMCaller import *
 from ..evaluation import evaluation_function, Params
diff --git a/app/test_scripts/test_no_information_provided.py b/app/test_scripts/test_no_information_provided.py
index 71992eb..9eb0a56 100644
--- a/app/test_scripts/test_no_information_provided.py
+++ b/app/test_scripts/test_no_information_provided.py
@@ -1,4 +1,6 @@
-# python -m app.test_scripts.test_no_information_provided
+# Script which tests the accuracy of the NoInformationProvided.py prompt
+
+# To run, enter in terminal: python -m app.test_scripts.test_no_information_provided
 
 from ..test_utils.InputAndExpectedOutput import InputAndExpectedOutputForSinglePrompt
 from ..utils.LLMCaller import *
diff --git a/app/test_scripts/test_summarize_control_measure_feedback_prompt.py b/app/test_scripts/test_summarize_control_measure_feedback_prompt.py
index 5c1aa81..84a1ed9 100644
--- a/app/test_scripts/test_summarize_control_measure_feedback_prompt.py
+++ b/app/test_scripts/test_summarize_control_measure_feedback_prompt.py
@@ -1,4 +1,4 @@
-# python -m app.test_scripts.test_summarize_control_measure_feedback_prompt
+# To run, enter in terminal: python -m app.test_scripts.test_summarize_control_measure_feedback_prompt
 
 from ..utils.LLMCaller import *
 from ..test_classes.TestSummarizeControlMeasureFeedback import TestSummarizePreventionFeedback, TestSummarizeMitigationFeedback
diff --git a/app/test_utils/ExamplesGenerator.py b/app/test_utils/ExamplesGenerator.py
index 136116d..45d2b54 100644
--- a/app/test_utils/ExamplesGenerator.py
+++ b/app/test_utils/ExamplesGenerator.py
@@ -1,3 +1,5 @@
+# Series of classes used to create InputAndExpectedOutput objects for different tests.
+
 from .InputAndExpectedOutput import InputAndExpectedOutputForSinglePrompt, InputAndExpectedOutputForCombinedPrompts
 
 class ExamplesGenerator:
diff --git a/app/test_utils/InputAndExpectedOutput.py b/app/test_utils/InputAndExpectedOutput.py
index bcd8dde..d290d56 100644
--- a/app/test_utils/InputAndExpectedOutput.py
+++ b/app/test_utils/InputAndExpectedOutput.py
@@ -1,3 +1,5 @@
+# Class which contains a prompt input object and the expected output for the prompt.
+
 try:
     from ..prompts.BasePromptInput import BasePromptInput
     from ..data.RiskAssessment import RiskAssessment
diff --git a/app/utils/GoogleSheetsWriter.py b/app/utils/GoogleSheetsWriter.py
index fc24cdb..31427c2 100644
--- a/app/utils/GoogleSheetsWriter.py
+++ b/app/utils/GoogleSheetsWriter.py
@@ -1,7 +1,7 @@
-import csv
+# Class used to write results from tests to google sheets.
+
 from google.oauth2 import service_account
 from googleapiclient.discovery import build
-from datetime import datetime
 
 class GoogleSheetsWriter:
     def __init__(self, sheet_name):
diff --git a/app/utils/LLMCaller.py b/app/utils/LLMCaller.py
index 2bde758..7b980cf 100644
--- a/app/utils/LLMCaller.py
+++ b/app/utils/LLMCaller.py
@@ -1,3 +1,5 @@
+# Series of classes built on LLMCaller base class, used to get LLM outputs from prompt inputs.
+
 import openai
 import requests
 import anthropic
diff --git a/app/utils/RegexPatternMatcher.py b/app/utils/RegexPatternMatcher.py
index 38df661..caaa62e 100644
--- a/app/utils/RegexPatternMatcher.py
+++ b/app/utils/RegexPatternMatcher.py
@@ -1,3 +1,5 @@
+# Class used to extract classifications from LLM outputs, e.g. prevention/mitigation for ControlMeasureClassificationPrompt
+
 import re
 
 class HarmCausedAndHazardEventAndHazardEvent: