hubmapconsortium · anitacaron · Jul 18, 2024 · Jul 18, 2024 · Jul 18, 2024 · Sep 13, 2024
diff --git a/src/dashboard_generation.py b/src/dashboard_generation.py
@@ -1,3 +1,7 @@
+"""
+This script generates a markdown file with the dashboard of the validation
+reports. The dashboard includes the terms and relationships reports.
+"""
 import argparse
 from datetime import date
 
@@ -6,54 +10,131 @@
 
 from readme_reports_generation import tsv2md
 
+PASS = "<font color='green'>"
+FAIL = "<font color='red'>"
+CLOSE_TAG = "</font>"
+BASE_PATH = "../reports/report_"
+DATE_FILE = date.today().strftime("%Y%m%d")
+DATE = date.today().strftime("%Y-%m-%d")
+
 
 def clean_up(report):
-    return report.drop(columns=["percent_indirect_AS-AS_relationship","percent_indirect_CT-CT_relationship"])
+    """
+    Cleans up the report by dropping unnecessary columns.
+
+    Args:
+        report (pandas.DataFrame): The report to clean up.
+
+    Returns:
+        pandas.DataFrame: The cleaned up report.
+    """
+    return report.drop(
+        columns=[
+            "percent_indirect_AS-AS_relationship",
+            "percent_indirect_CT-CT_relationship"
+        ]
+    )
+
 
 def add_link(report):
+    """
+    Adds links to the report.
+
+    Args:
+        report (pandas.DataFrame): The report to add links to.
+
+    Returns:
+        pandas.DataFrame: The report with links added.
+    """
     for row in report.itertuples():
         row_table = row.Table
         if row_table != "Total":
-            report.at[row.Index, "Table"] = f"[{row_table}]({row_table}/README.md)"
+            report.at[row.Index, "Table"] = (
+                f"[{row_table}]({row_table}/README.md)"
+            )
 
     return report
 
+
 def check_number_n_get_color(number):
-    PASS = "<font color='green'>"
-    FAIL = "<font color='red'>"
-    CLOSE_TAG = "</font>"
+    """
+    Checks the number and returns the corresponding color tag.
+
+    Args:
+        number (int or float): The number to check.
+
+    Returns:
+        str: The color tag.
+    """
     if isinstance(number, int):
         if number > 0:
             return f"{FAIL}{number}{CLOSE_TAG}"
-        else:
-            return f"{PASS}{number}{CLOSE_TAG}"
-    elif isinstance(number, float):
+        return f"{PASS}{number}{CLOSE_TAG}"
+    if isinstance(number, float):
         if number < 50.0:
             return f"{PASS}{number}{CLOSE_TAG}"
-        else:
-            return f"{FAIL}{number}{CLOSE_TAG}"
-
+
+        return f"{FAIL}{number}{CLOSE_TAG}"
+    return number
+
 
 def add_color(report, report_type):
+    """
+    Adds color tags to the report based on the report type.
+
+    Args:
+        report (pandas.DataFrame): The report to add color tags to.
+        report_type (str): The type of report ("terms" or "relations").
+
+    Returns:
+        pandas.DataFrame: The report with color tags added.
+    """
     if report_type == "terms":
         for row in report.itertuples():
-            report.at[row.Index, "AS_invalid_term_percent"] = check_number_n_get_color(row.AS_invalid_term_percent)
-            report.at[row.Index, "CT_invalid_term_percent"] = check_number_n_get_color(row.CT_invalid_term_percent)
-            report.at[row.Index, "invalid_terms_percent"] = check_number_n_get_color(row.invalid_terms_percent)
+            report.at[row.Index, "AS_invalid_term_percent"] = (
+                check_number_n_get_color(row.AS_invalid_term_percent)
+            )
+            report.at[row.Index, "CT_invalid_term_percent"] = (
+                check_number_n_get_color(row.CT_invalid_term_percent)
+            )
+            report.at[row.Index, "invalid_terms_percent"] = (
+                check_number_n_get_color(row.invalid_terms_percent)
+            )
     elif report_type == "relations":
         for row in report.itertuples():
-            report.at[row.Index, "percent_invalid_AS-AS_relationship"] = check_number_n_get_color(row._3)
-            report.at[row.Index, "percent_invalid_CT-CT_relationship"] = check_number_n_get_color(row._5)
-            report.at[row.Index, "percent_invalid_CT-AS_relationship"] = check_number_n_get_color(row._7)
-            report.at[row.Index, "number_of_no_parent_relationships"] = check_number_n_get_color(row.number_of_no_parent_relationships)
-            report.at[row.Index, "unique_no_parent_relationships"] = check_number_n_get_color(int(row.unique_no_parent_relationships))
-
+            report.at[row.Index, "percent_invalid_AS-AS_relationship"] = (
+                check_number_n_get_color(row._3)
+            )
+            report.at[row.Index, "percent_invalid_CT-CT_relationship"] = (
+                check_number_n_get_color(row._5)
+            )
+            report.at[row.Index, "percent_invalid_CT-AS_relationship"] = (
+                check_number_n_get_color(row._7)
+            )
+            report.at[row.Index, "number_of_no_parent_relationships"] = (
+                check_number_n_get_color(row.number_of_no_parent_relationships)
+            )
+            report.at[row.Index, "unique_no_parent_relationships"] = (
+                check_number_n_get_color(
+                    int(row.unique_no_parent_relationships)
+                )
+            )
+
     return report
 
-def get_reports(date):
-    BASE_PATH = "../reports/report_"
 
-    ter_report = pd.read_csv(f"{BASE_PATH}terms_{date}.tsv", sep='\t')
+def get_reports():
+    """
+    Retrieves the terms and relationships reports for the given date.
+
+    Args:
+        date (str): The date in the format "YYYYMMDD".
+
+    Returns:
+        tuple: A tuple containing the terms report and the relationships
+                report.
+    """
+    ter_report = pd.read_csv(f"{BASE_PATH}terms_{DATE_FILE}.tsv", sep="\t")
     ter_report.sort_values(by=["Table"], inplace=True)
     ter_report.loc["Total"] = ter_report.sum()
     ter_report.loc[ter_report.index[-1], "Table"] = "Total"
@@ -77,13 +158,21 @@ def get_reports(date):
     ter_report = add_link(ter_report)
     ter_report_md = tsv2md(ter_report)
 
-    rel_report = pd.read_csv(f"{BASE_PATH}relationship_{date}.tsv", sep='\t')
+    rel_report = pd.read_csv(
+        f"{BASE_PATH}relationship_{DATE_FILE}.tsv", sep="\t"
+    )
     rel_report.sort_values(by=["Table"], inplace=True)
     rel_report.loc["Total"] = rel_report.sum()
     rel_report.loc[rel_report.index[-1], "Table"] = "Total"
-    rel_report.loc[rel_report.index[-1], "percent_invalid_AS-AS_relationship"] = ""
-    rel_report.loc[rel_report.index[-1], "percent_invalid_CT-CT_relationship"] = ""
-    rel_report.loc[rel_report.index[-1], "percent_invalid_CT-AS_relationship"] = ""
+    rel_report.loc[
+        rel_report.index[-1], "percent_invalid_AS-AS_relationship"
+    ] = ""
+    rel_report.loc[
+        rel_report.index[-1], "percent_invalid_CT-CT_relationship"
+    ] = ""
+    rel_report.loc[
+        rel_report.index[-1], "percent_invalid_CT-AS_relationship"
+    ] = ""
     rel_report = clean_up(rel_report.reset_index(drop=True))
     rel_report = add_color(rel_report, "relations")
     rel_report.rename(columns={
@@ -98,19 +187,30 @@ def get_reports(date):
     }, inplace=True)
     rel_report = add_link(rel_report)
     rel_report_md = tsv2md(rel_report)
-    
+
     return ter_report_md, rel_report_md
 
+
 def generate_dashboard(output):
-    DATE_FILE = date.today().strftime("%Y%m%d")
-    DATE = date.today().strftime('%Y-%m-%d')
+    """
+    Generates the validation dashboard.
+
+    Args:
+        output (str): The output file path.
+    """
+    template = MdUtils(
+        file_name=output, title=f"Validation Dashboard ({DATE})"
+    )
+
+    terms_report, rel_report = get_reports()
 
-    template = MdUtils(file_name=output, title=f'Validation Dashboard ({DATE})')
-
-    terms_report, rel_report = get_reports(DATE_FILE)
-
     template.new_header(level=1, title="Terms")
-    template.new_paragraph(text="Invalid AS or CT terms include terms not from UBERON or CL ontologies. Also, it includes terms without ID.")
+    template.new_paragraph(
+        text=(
+            """Invalid AS or CT terms include terms not from UBERON or CL
+                ontologies. Also, it includes terms without ID."""
+        )
+    )
     template.new_paragraph(text=terms_report)
 
     template.new_paragraph(text="\n\n")
@@ -119,12 +219,13 @@ def generate_dashboard(output):
     template.new_paragraph(text=rel_report)
 
     template.new_paragraph(text="\n\n")
-        
+
     template.create_md_file()
 
-if __name__ == '__main__':
+
+if __name__ == "__main__":
     parser = argparse.ArgumentParser()
     parser.add_argument("-o", "--output", help="output file path")
-    
+
     args = parser.parse_args()
-    generate_dashboard(args.output)
+    generate_dashboard(args.output)
diff --git a/src/graph_construct.py b/src/graph_construct.py
@@ -1,41 +1,73 @@
-import argparse, json
+"""
+This module contains the functions to merge the suggestion and the validated
+graphs from the JSON file. Plus change the style of the edges in the graph.
+"""
+import argparse
+import json
+
 
 def merge_json(file, sec, param, value):
-  style_prop = {
-      "basicPropertyValues": [
-        {
-          "pred": f"https://w3id.org/kgviz/{param}",
-          "val": f"{value}"
-        }
-      ]
-    }
-  if sec["graphs"][0].get("edges"):
-    for f_edge in file["graphs"][0]["edges"]:
-      for s_edge in sec["graphs"][0]["edges"]:
-        if f_edge["sub"] == s_edge["sub"] and f_edge["pred"] == s_edge["pred"] and f_edge["obj"] == s_edge["obj"]:
-          f_edge["meta"] = style_prop
-
-  return file
-
-def main(args):
-  f = open(args.input)
-  file = json.load(f)
-  f = open(args.sec)
-  sec = json.load(f)
-
-  output = merge_json(file, sec, args.param, args.value)
-
-  with open(args.output, 'w', encoding='utf-8') as f:
-    json.dump(output, f, ensure_ascii=False, indent=2)  
-
-if __name__ == '__main__':
-  parser = argparse.ArgumentParser()
-  parser.add_argument('input', help='input json file')
-  parser.add_argument('sec', help='json with additional relationships')
-  parser.add_argument('output', help='output file')
-  parser.add_argument('param', help='property name')
-  parser.add_argument('value', help='value of the param')
-
-
-  args = parser.parse_args()
-  main(args)
+    """
+    Merge the JSON file with additional relationships.
+
+    Args:
+        file (dict): The JSON file with the validated relationships.
+        sec (dict): The JSON file with the suggestion relationships
+          for the non-valid relations.
+        param (str): The property name.
+        value (str): The value of the param.
+
+    Returns:
+        dict: The merged JSON file.
+    """
+    style_prop = {
+        "basicPropertyValues": [
+          {
+            "pred": f"https://w3id.org/kgviz/{param}",
+            "val": f"{value}"
+          }
+        ]
+      }
+    if sec["graphs"][0].get("edges"):
+        for f_edge in file["graphs"][0]["edges"]:
+            for s_edge in sec["graphs"][0]["edges"]:
+                if f_edge["sub"] == s_edge["sub"]\
+                  and f_edge["pred"] == s_edge["pred"]\
+                  and f_edge["obj"] == s_edge["obj"]:
+                    f_edge["meta"] = style_prop
+
+    return file
+
+
+def main(args_params):
+    """
+    Main function to merge JSON files.
+
+    Args:
+        args (argparse.Namespace): Command-line arguments.
+
+    Returns:
+        None
+    """
+    with open(args_params.input, encoding="utf-8") as f:
+        file = json.load(f)
+
+    with open(args_params.sec, encoding="utf-8") as f:
+        sec = json.load(f)
+
+    output = merge_json(file, sec, args_params.param, args_params.value)
+
+    with open(args_params.output, "w", encoding="utf-8") as f:
+        json.dump(output, f, ensure_ascii=False, indent=2)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("input", help="input json file")
+    parser.add_argument("sec", help="json with additional relationships")
+    parser.add_argument("output", help="output file")
+    parser.add_argument("param", help="property name")
+    parser.add_argument("value", help="value of the param")
+
+    args = parser.parse_args()
+    main(args)