Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
179 changes: 140 additions & 39 deletions src/dashboard_generation.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
"""
This script generates a markdown file with the dashboard of the validation
reports. The dashboard includes the terms and relationships reports.
"""
import argparse
from datetime import date

Expand All @@ -6,54 +10,131 @@

from readme_reports_generation import tsv2md

PASS = "<font color='green'>"
FAIL = "<font color='red'>"
CLOSE_TAG = "</font>"
BASE_PATH = "../reports/report_"
DATE_FILE = date.today().strftime("%Y%m%d")
DATE = date.today().strftime("%Y-%m-%d")


def clean_up(report):
return report.drop(columns=["percent_indirect_AS-AS_relationship","percent_indirect_CT-CT_relationship"])
"""
Cleans up the report by dropping unnecessary columns.

Args:
report (pandas.DataFrame): The report to clean up.

Returns:
pandas.DataFrame: The cleaned up report.
"""
return report.drop(
columns=[
"percent_indirect_AS-AS_relationship",
"percent_indirect_CT-CT_relationship"
]
)


def add_link(report):
"""
Adds links to the report.

Args:
report (pandas.DataFrame): The report to add links to.

Returns:
pandas.DataFrame: The report with links added.
"""
for row in report.itertuples():
row_table = row.Table
if row_table != "Total":
report.at[row.Index, "Table"] = f"[{row_table}]({row_table}/README.md)"
report.at[row.Index, "Table"] = (
f"[{row_table}]({row_table}/README.md)"
)

return report


def check_number_n_get_color(number):
PASS = "<font color='green'>"
FAIL = "<font color='red'>"
CLOSE_TAG = "</font>"
"""
Checks the number and returns the corresponding color tag.

Args:
number (int or float): The number to check.

Returns:
str: The color tag.
"""
if isinstance(number, int):
if number > 0:
return f"{FAIL}{number}{CLOSE_TAG}"
else:
return f"{PASS}{number}{CLOSE_TAG}"
elif isinstance(number, float):
return f"{PASS}{number}{CLOSE_TAG}"
if isinstance(number, float):
if number < 50.0:
return f"{PASS}{number}{CLOSE_TAG}"
else:
return f"{FAIL}{number}{CLOSE_TAG}"


return f"{FAIL}{number}{CLOSE_TAG}"
return number


def add_color(report, report_type):
"""
Adds color tags to the report based on the report type.

Args:
report (pandas.DataFrame): The report to add color tags to.
report_type (str): The type of report ("terms" or "relations").

Returns:
pandas.DataFrame: The report with color tags added.
"""
if report_type == "terms":
for row in report.itertuples():
report.at[row.Index, "AS_invalid_term_percent"] = check_number_n_get_color(row.AS_invalid_term_percent)
report.at[row.Index, "CT_invalid_term_percent"] = check_number_n_get_color(row.CT_invalid_term_percent)
report.at[row.Index, "invalid_terms_percent"] = check_number_n_get_color(row.invalid_terms_percent)
report.at[row.Index, "AS_invalid_term_percent"] = (
check_number_n_get_color(row.AS_invalid_term_percent)
)
report.at[row.Index, "CT_invalid_term_percent"] = (
check_number_n_get_color(row.CT_invalid_term_percent)
)
report.at[row.Index, "invalid_terms_percent"] = (
check_number_n_get_color(row.invalid_terms_percent)
)
elif report_type == "relations":
for row in report.itertuples():
report.at[row.Index, "percent_invalid_AS-AS_relationship"] = check_number_n_get_color(row._3)
report.at[row.Index, "percent_invalid_CT-CT_relationship"] = check_number_n_get_color(row._5)
report.at[row.Index, "percent_invalid_CT-AS_relationship"] = check_number_n_get_color(row._7)
report.at[row.Index, "number_of_no_parent_relationships"] = check_number_n_get_color(row.number_of_no_parent_relationships)
report.at[row.Index, "unique_no_parent_relationships"] = check_number_n_get_color(int(row.unique_no_parent_relationships))

report.at[row.Index, "percent_invalid_AS-AS_relationship"] = (
check_number_n_get_color(row._3)
)
report.at[row.Index, "percent_invalid_CT-CT_relationship"] = (
check_number_n_get_color(row._5)
)
report.at[row.Index, "percent_invalid_CT-AS_relationship"] = (
check_number_n_get_color(row._7)
)
report.at[row.Index, "number_of_no_parent_relationships"] = (
check_number_n_get_color(row.number_of_no_parent_relationships)
)
report.at[row.Index, "unique_no_parent_relationships"] = (
check_number_n_get_color(
int(row.unique_no_parent_relationships)
)
)

return report

def get_reports(date):
BASE_PATH = "../reports/report_"

ter_report = pd.read_csv(f"{BASE_PATH}terms_{date}.tsv", sep='\t')
def get_reports():
"""
Retrieves the terms and relationships reports for the given date.

Args:
date (str): The date in the format "YYYYMMDD".

Returns:
tuple: A tuple containing the terms report and the relationships
report.
"""
ter_report = pd.read_csv(f"{BASE_PATH}terms_{DATE_FILE}.tsv", sep="\t")
ter_report.sort_values(by=["Table"], inplace=True)
ter_report.loc["Total"] = ter_report.sum()
ter_report.loc[ter_report.index[-1], "Table"] = "Total"
Expand All @@ -77,13 +158,21 @@ def get_reports(date):
ter_report = add_link(ter_report)
ter_report_md = tsv2md(ter_report)

rel_report = pd.read_csv(f"{BASE_PATH}relationship_{date}.tsv", sep='\t')
rel_report = pd.read_csv(
f"{BASE_PATH}relationship_{DATE_FILE}.tsv", sep="\t"
)
rel_report.sort_values(by=["Table"], inplace=True)
rel_report.loc["Total"] = rel_report.sum()
rel_report.loc[rel_report.index[-1], "Table"] = "Total"
rel_report.loc[rel_report.index[-1], "percent_invalid_AS-AS_relationship"] = ""
rel_report.loc[rel_report.index[-1], "percent_invalid_CT-CT_relationship"] = ""
rel_report.loc[rel_report.index[-1], "percent_invalid_CT-AS_relationship"] = ""
rel_report.loc[
rel_report.index[-1], "percent_invalid_AS-AS_relationship"
] = ""
rel_report.loc[
rel_report.index[-1], "percent_invalid_CT-CT_relationship"
] = ""
rel_report.loc[
rel_report.index[-1], "percent_invalid_CT-AS_relationship"
] = ""
rel_report = clean_up(rel_report.reset_index(drop=True))
rel_report = add_color(rel_report, "relations")
rel_report.rename(columns={
Expand All @@ -98,19 +187,30 @@ def get_reports(date):
}, inplace=True)
rel_report = add_link(rel_report)
rel_report_md = tsv2md(rel_report)

return ter_report_md, rel_report_md


def generate_dashboard(output):
DATE_FILE = date.today().strftime("%Y%m%d")
DATE = date.today().strftime('%Y-%m-%d')
"""
Generates the validation dashboard.

Args:
output (str): The output file path.
"""
template = MdUtils(
file_name=output, title=f"Validation Dashboard ({DATE})"
)

terms_report, rel_report = get_reports()

template = MdUtils(file_name=output, title=f'Validation Dashboard ({DATE})')

terms_report, rel_report = get_reports(DATE_FILE)

template.new_header(level=1, title="Terms")
template.new_paragraph(text="Invalid AS or CT terms include terms not from UBERON or CL ontologies. Also, it includes terms without ID.")
template.new_paragraph(
text=(
"""Invalid AS or CT terms include terms not from UBERON or CL
ontologies. Also, it includes terms without ID."""
)
)
template.new_paragraph(text=terms_report)

template.new_paragraph(text="\n\n")
Expand All @@ -119,12 +219,13 @@ def generate_dashboard(output):
template.new_paragraph(text=rel_report)

template.new_paragraph(text="\n\n")

template.create_md_file()

if __name__ == '__main__':

if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("-o", "--output", help="output file path")

args = parser.parse_args()
generate_dashboard(args.output)
generate_dashboard(args.output)
110 changes: 71 additions & 39 deletions src/graph_construct.py
Original file line number Diff line number Diff line change
@@ -1,41 +1,73 @@
import argparse, json
"""
This module contains the functions to merge the suggestion and the validated
graphs from the JSON file. Plus change the style of the edges in the graph.
"""
import argparse
import json


def merge_json(file, sec, param, value):
style_prop = {
"basicPropertyValues": [
{
"pred": f"https://w3id.org/kgviz/{param}",
"val": f"{value}"
}
]
}
if sec["graphs"][0].get("edges"):
for f_edge in file["graphs"][0]["edges"]:
for s_edge in sec["graphs"][0]["edges"]:
if f_edge["sub"] == s_edge["sub"] and f_edge["pred"] == s_edge["pred"] and f_edge["obj"] == s_edge["obj"]:
f_edge["meta"] = style_prop

return file

def main(args):
f = open(args.input)
file = json.load(f)
f = open(args.sec)
sec = json.load(f)

output = merge_json(file, sec, args.param, args.value)

with open(args.output, 'w', encoding='utf-8') as f:
json.dump(output, f, ensure_ascii=False, indent=2)

if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('input', help='input json file')
parser.add_argument('sec', help='json with additional relationships')
parser.add_argument('output', help='output file')
parser.add_argument('param', help='property name')
parser.add_argument('value', help='value of the param')


args = parser.parse_args()
main(args)
"""
Merge the JSON file with additional relationships.

Args:
file (dict): The JSON file with the validated relationships.
sec (dict): The JSON file with the suggestion relationships
for the non-valid relations.
param (str): The property name.
value (str): The value of the param.

Returns:
dict: The merged JSON file.
"""
style_prop = {
"basicPropertyValues": [
{
"pred": f"https://w3id.org/kgviz/{param}",
"val": f"{value}"
}
]
}
if sec["graphs"][0].get("edges"):
for f_edge in file["graphs"][0]["edges"]:
for s_edge in sec["graphs"][0]["edges"]:
if f_edge["sub"] == s_edge["sub"]\
and f_edge["pred"] == s_edge["pred"]\
and f_edge["obj"] == s_edge["obj"]:
f_edge["meta"] = style_prop

return file


def main(args_params):
"""
Main function to merge JSON files.

Args:
args (argparse.Namespace): Command-line arguments.

Returns:
None
"""
with open(args_params.input, encoding="utf-8") as f:
file = json.load(f)

with open(args_params.sec, encoding="utf-8") as f:
sec = json.load(f)

output = merge_json(file, sec, args_params.param, args_params.value)

with open(args_params.output, "w", encoding="utf-8") as f:
json.dump(output, f, ensure_ascii=False, indent=2)


if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("input", help="input json file")
parser.add_argument("sec", help="json with additional relationships")
parser.add_argument("output", help="output file")
parser.add_argument("param", help="property name")
parser.add_argument("value", help="value of the param")

args = parser.parse_args()
main(args)
Loading