eSIMBA/main.py at master · nsgln/eSIMBA · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
"""Main script to run SIMBA and eSIMBA clustering algorithms.

Possible command-line arguments are:
    1. Input parameters:
        - --input_path: Path to the input data directory (zip file containing npz files with graph data) **REQUIRED**.
        - --ground_truth: Flag indicating whether ground truth community is available in data files.
        - --name: Flag indicating whether nodes names are available in data files.
    2. Output parameters:
        - --results_path: Path to the output results directory. Default is './Results'.
        - --output_prefix: Prefix for the output files. Default is 'output'.
        - --draw: Flag to indicate whether to draw and save communities evolution plots.
    3. Execution parameters:
        - --static: Flag to run SIMBA (static) algorithm. If not set, eSIMBA (dynamic) algorithm is run by default.
        - --parallelism: Flag to enable parallel processing.
        - --debug: Flag to enable debug mode.
        - --debug_limit: Limit for debug mode (number of graphs to process). Default is 5.
        - --shuffle: Flag to shuffle the order of graphs before processing (in debug mode only).
    4. Algorithm parameters:
        - --threshold: Threshold on p-values associated to a node, used in the filtering step. Default is 0.05.
        - --min_community_size: Minimum size of communities to be detected. Default is 5.
        - --min_window_community_size: Minimum size of communities to be detected within a time window (for eSIMBA). Default is 3.
    5. Verbosity:
        - --verbose: Flag to enable verbose output.

@author: Nina Singlan"""
import os
import time

from utils.run import run_simba, run_esimba
from utils.saving import compute_and_save_statistics
from utils.utils import parse_args, validate_args

if __name__ == "__main__":
    # 1 - Start clock
    global_start_time = time.time()

    # 2 - Parse and validate command-line arguments
    args = parse_args()
    args = validate_args(args)

    # 3 - Run clustering algorithm based on user input
    if args.static:
        statistics = run_simba(args)
    else:
        statistics = run_esimba(args)

    # 4 - Compute and save statistics
    statistics_file_path = str(os.path.join(args.results_path, args.output_prefix))
    statistics_file_path += "_global_execution_statistics.txt"
    compute_and_save_statistics(statistics, statistics_file_path)

    # 5 - End clock and print total execution time
    global_execution_time = time.time() - global_start_time
    if args.verbose:
        print(f"Total execution time: {global_execution_time // 3600}h "
              f"{(global_execution_time % 3600) // 60}m "
              f"{global_execution_time % 60:.2f}s")