diff --git a/inc/scan.h b/inc/scan.h index 1de390a..59f7986 100644 --- a/inc/scan.h +++ b/inc/scan.h @@ -81,7 +81,7 @@ void scan_data_free (scan_data_t * scan); void ldb_scan(scan_data_t * scan); match_t ldb_scan_snippets(scan_data_t *scan_ptr); -int wfp_scan(char * path, int scan_max_snippets, int scan_max_components, bool adjust_tolerance, int component_ranking_threshold, int snippet_min_hits, int snippet_min_lines, int snippet_range_tolerance, bool snippet_honor_file_extension); +int wfp_scan(char * path, int scan_max_snippets, int scan_max_components, bool adjust_tolerance, int component_ranking_threshold, int snippet_min_hits, int snippet_min_lines, int snippet_range_tolerance, bool snippet_honor_file_extension, bool report_progress); int hash_scan(char *path, int scan_max_snippets, int scan_max_components, bool adjust_tolerance, int component_ranking_threshold, int snippet_min_hits, int snippet_min_lines, int snippet_range_tolerance, bool snippet_honor_file_extension); #endif diff --git a/src/help.c b/src/help.c index 075f9fb..720d552 100644 --- a/src/help.c +++ b/src/help.c @@ -77,6 +77,13 @@ Options:\n\ -d, --debug Store debugging information to disk (/tmp).\n\ -q, --quiet Suppress JSON output (show only debugging info via STDERR).\n\ \n\ +Batch Processing:\n\ +-R, --report Enable batch mode: write progress to /tmp/engine/batch-scan/\n\ + and results to /tmp/engine/batch-result/ instead of STDOUT.\n\ +-C, --clean Clean completed status files older than 2 hours from batch-scan directory.\n\ + --batch-status ID Show scan progress/status for the given scan ID.\n\ + --batch-result ID Show scan results for the given scan ID (only if completed).\n\ +\n\ Environment variables:\n\ SCANOSS_MATCHMAP_MAX: Set the snippet scanning match map size (default: %d).\n\ SCANOSS_FILE_CONTENTS_URL: Define the API URL endpoint for sources. Source URL won't be reported if not defined.\n\ diff --git a/src/main.c b/src/main.c index b732d42..993a273 100644 --- a/src/main.c +++ b/src/main.c @@ -47,6 +47,9 @@ #include "hpsm.h" #include #include +#include +#include +#include struct ldb_table oss_url; struct ldb_table oss_file; @@ -70,6 +73,7 @@ int scan_range_tolerance = SNIPPETS_DEFAULT_RANGE_TOLERANCE; // Maximum number o bool scan_adjust_tolerance = SNIPPETS_DEFAULT_ADJUST_TOLERANCE; /** Adjust tolerance based on file size */ int scan_ranking_threshold = -1; //disable by defaults bool scan_honor_file_extension = SNIPPETS_DEFAULT_HONOR_FILE_EXTENSION; +bool scan_report_progress = false; bool lib_encoder_present = false; #define LDB_VER_MIN "4.1.0" @@ -216,7 +220,7 @@ void recurse_directory(char *name) if (wfp) wfp_scan(path, scan_max_snippets, scan_max_components, scan_adjust_tolerance, - scan_ranking_threshold, scan_min_match_hits, scan_min_match_lines, scan_range_tolerance, scan_honor_file_extension); + scan_ranking_threshold, scan_min_match_hits, scan_min_match_lines, scan_range_tolerance, scan_honor_file_extension, scan_report_progress); else { scan_data_t * scan = scan_data_init(path, scan_max_snippets, scan_max_components, scan_adjust_tolerance, @@ -248,6 +252,206 @@ bool validate_alpha(char *txt) return true; } +#define STATUS_DIR "/tmp/engine/batch-scan" +#define RESULT_DIR "/tmp/engine/batch-result" +#define MAX_AGE_SECONDS (2 * 60 * 60) /* 2 hours */ + +/** + * @brief Extracts scan ID from file path (filename without extension) + * @param path File path + * @return Allocated string with scan ID, caller must free + */ +static char* get_scanid_from_path(const char *path) +{ + if (!path) return NULL; + + /* Get basename */ + char *path_copy = strdup(path); + char *base = basename(path_copy); + + /* Remove extension */ + char *scanid = strdup(base); + char *dot = strrchr(scanid, '.'); + if (dot) *dot = '\0'; + + free(path_copy); + return scanid; +} + +/** + * @brief Setup output redirection to batch result file + * @param scanid Scan ID for this operation + * @return 0 on success, -1 on error + */ +static int setup_batch_result_output(const char *scanid) +{ + if (!scanid) return -1; + + /* Create directories */ + mkdir("/tmp/engine", 0755); + mkdir(RESULT_DIR, 0755); + + /* Build result file path */ + char result_path[MAX_PATH]; + snprintf(result_path, sizeof(result_path), "%s/%s", RESULT_DIR, scanid); + + /* Redirect stdout to file */ + if (freopen(result_path, "w", stdout) == NULL) { + fprintf(stderr, "Error: Could not redirect output to %s: %s\n", result_path, strerror(errno)); + return -1; + } + + return 0; +} + +/** + * @brief Show batch result file content to stdout + * Verifies scan status before returning results + * @param scanid Scan ID to retrieve + * @return 0 on success, -1 on error + */ +static int show_batch_result(const char *scanid) +{ + if (!scanid) { + fprintf(stderr, "Error: No scan ID provided\n"); + return -1; + } + + /* First check the status file */ + char status_path[MAX_PATH]; + snprintf(status_path, sizeof(status_path), "%s/%s", STATUS_DIR, scanid); + + FILE *status_fp = fopen(status_path, "r"); + if (!status_fp) { + printf("{\"message\":\"Scan not found\"}\n"); + return -1; + } + + char status_buffer[512]; + bool is_completed = false; + + if (fgets(status_buffer, sizeof(status_buffer), status_fp) != NULL) { + char *status_ptr = strstr(status_buffer, "\"status\":\""); + if (status_ptr && strncmp(status_ptr + 10, "completed", 9) == 0) { + is_completed = true; + } + } + fclose(status_fp); + + /* If not completed, return the status file content directly */ + if (!is_completed) { + printf("%s", status_buffer); + return 0; + } + + /* Status is completed, return the result */ + char result_path[MAX_PATH]; + snprintf(result_path, sizeof(result_path), "%s/%s", RESULT_DIR, scanid); + + FILE *fp = fopen(result_path, "r"); + if (!fp) { + printf("{\"message\":\"Result file not found\"}\n"); + return -1; + } + + char buffer[4096]; + size_t bytes; + while ((bytes = fread(buffer, 1, sizeof(buffer), fp)) > 0) { + fwrite(buffer, 1, bytes, stdout); + } + + fclose(fp); + return 0; +} + +/** + * @brief Show batch status file content to stdout + * @param scanid Scan ID to retrieve + * @return 0 on success, -1 on error + */ +static int show_batch_status(const char *scanid) +{ + if (!scanid) { + fprintf(stderr, "Error: No scan ID provided\n"); + return -1; + } + + char status_path[MAX_PATH]; + snprintf(status_path, sizeof(status_path), "%s/%s", STATUS_DIR, scanid); + + FILE *fp = fopen(status_path, "r"); + if (!fp) { + fprintf(stderr, "Error: Status file not found: %s\n", status_path); + return -1; + } + + char buffer[512]; + if (fgets(buffer, sizeof(buffer), fp) != NULL) { + printf("%s", buffer); + } + + fclose(fp); + return 0; +} + +/** + * @brief Clean old completed status files from the batch-scan directory + * Removes files that have status "completed" and are older than 2 hours + * @return Number of files cleaned + */ +int clean_old_status_files(void) +{ + DIR *dir = opendir(STATUS_DIR); + if (!dir) { + printf("Status directory does not exist: %s\n", STATUS_DIR); + return 0; + } + + struct dirent *entry; + int cleaned = 0; + time_t now = time(NULL); + char filepath[MAX_PATH]; + char buffer[512]; + + while ((entry = readdir(dir)) != NULL) { + if (entry->d_name[0] == '.') continue; + + snprintf(filepath, sizeof(filepath), "%s/%s", STATUS_DIR, entry->d_name); + + FILE *fp = fopen(filepath, "r"); + if (!fp) continue; + + if (fgets(buffer, sizeof(buffer), fp) != NULL) { + /* Parse JSON: {"started":TIMESTAMP,"status":"STATUS","progress":NUM} */ + char *status_ptr = strstr(buffer, "\"status\":\""); + char *started_ptr = strstr(buffer, "\"started\":"); + + if (status_ptr && started_ptr) { + /* Check if status is "completed" */ + if (strncmp(status_ptr + 10, "completed", 9) == 0) { + /* Extract timestamp */ + time_t started = atol(started_ptr + 10); + + /* Check if older than MAX_AGE_SECONDS */ + if ((now - started) >= MAX_AGE_SECONDS) { + fclose(fp); + if (unlink(filepath) == 0) { + cleaned++; + printf("Cleaned: %s\n", entry->d_name); + } + continue; + } + } + } + } + fclose(fp); + } + + closedir(dir); + printf("Total files cleaned: %d\n", cleaned); + return cleaned; +} + /** * @brief Read flags from /etc/scanoss_flags.cfg * @return //TODO @@ -299,6 +503,10 @@ static struct option long_options[] = { {"debug", no_argument, 0, 'd'}, {"quiet", no_argument, 0, 'q'}, {"hpsm", no_argument, 0, 'H'}, + {"report", no_argument, 0, 'R'}, + {"clean", no_argument, 0, 'C'}, + {"batch-result", required_argument, 0, 262}, + {"batch-status", required_argument, 0, 263}, {0, 0, 0, 0} }; @@ -325,7 +533,7 @@ int main(int argc, char **argv) bool force_wfp = false; bool force_bfp = false; - + microseconds_start = microseconds_now(); /* Parse arguments */ @@ -334,7 +542,7 @@ int main(int argc, char **argv) bool invalid_argument = false; char * ldb_db_name = NULL; - while ((option = getopt_long(argc, argv, ":r:T:s:b:c:k:a:F:l:n:M:N:wtLvhdqH", long_options, &option_index)) != -1) + while ((option = getopt_long(argc, argv, ":r:T:s:b:c:k:a:F:l:n:M:N:wtLvhdqHRC", long_options, &option_index)) != -1) { /* Check valid alpha is entered */ if (optarg) @@ -487,6 +695,30 @@ int main(int argc, char **argv) exit(EXIT_FAILURE); } break; + + case 'R': + scan_report_progress = true; + scanlog("Progress reporting enabled\n"); + break; + + case 'C': + clean_old_status_files(); + exit(EXIT_SUCCESS); + break; + + case 262: /* --batch-result */ + if (show_batch_result(optarg) == 0) + exit(EXIT_SUCCESS); + else + exit(EXIT_FAILURE); + break; + + case 263: /* --batch-status */ + if (show_batch_status(optarg) == 0) + exit(EXIT_SUCCESS); + else + exit(EXIT_FAILURE); + break; } if (invalid_argument) break; } @@ -533,6 +765,14 @@ int main(int argc, char **argv) strcpy (target, argv[argc-1]); for (int i=strlen(target)-1; i>=0; i--) if (target[i]=='/') target[i]=0; else break; + /* Redirect output to batch result file if report mode is enabled */ + char *batch_scanid = NULL; + if (scan_report_progress) { + batch_scanid = get_scanid_from_path(target); + if (batch_scanid) { + setup_batch_result_output(batch_scanid); + } + } /* Open main report structure */ json_open(); @@ -560,7 +800,7 @@ int main(int argc, char **argv) /* Scan wfp file */ if (wfp_extension) wfp_scan(target, scan_max_snippets, scan_max_components, scan_adjust_tolerance, - scan_ranking_threshold, scan_min_match_hits, scan_min_match_lines, scan_range_tolerance, scan_honor_file_extension); + scan_ranking_threshold, scan_min_match_hits, scan_min_match_lines, scan_range_tolerance, scan_honor_file_extension, scan_report_progress); else if (bfp_extension) binary_scan(target); @@ -579,7 +819,8 @@ int main(int argc, char **argv) /* Close main report structure */ json_close(); - if (target) free (target); + if (target) free(target); + if (batch_scanid) free(batch_scanid); } if (ignore_components) diff --git a/src/scan.c b/src/scan.c index 33c85ec..35ed9e0 100644 --- a/src/scan.c +++ b/src/scan.c @@ -36,6 +36,15 @@ #include "hpsm.h" #include "match_list.h" #include "report.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include /** @file scan.c @@ -211,6 +220,74 @@ int hash_scan(char *path, int scan_max_snippets, int scan_max_components, bool a return EXIT_SUCCESS; } +/** + * @brief Extracts scan ID from file path (filename without extension) + * @param path File path + * @return Allocated string with scan ID, caller must free + */ +static char* extract_scanid_from_path(const char *path) +{ + if (!path) return NULL; + + /* Get basename */ + char *path_copy = strdup(path); + char *base = basename(path_copy); + + /* Remove extension */ + char *scanid = strdup(base); + char *dot = strrchr(scanid, '.'); + if (dot) *dot = '\0'; + + free(path_copy); + return scanid; +} + +/** + * @brief Writes progress JSON to status file + * @param status_path Path to the status file + * @param started Start timestamp + * @param status Status string ("scanning", "completed", "failed") + * @param progress Progress percentage (0-100) + */ +static void write_progress_to_file(const char *status_path, time_t started, const char *status, int progress) +{ + if (!status_path || !status) return; + + FILE *fp = fopen(status_path, "w"); + if (!fp) { + scanlog("Warning: Could not open status file %s: %s\n", status_path, strerror(errno)); + return; + } + + fprintf(fp, "{\"started\":%ld,\"status\":\"%s\",\"progress\":%d}\n", + started, status, progress); + fclose(fp); +} + +/** + * @brief Creates status file path and directories + * @param scanid Scan ID for this operation + * @param status_path_out Output buffer for status file path (must be at least PATH_MAX size) + * @return 0 on success, -1 on error + */ +static int create_status_file(const char *scanid, char *status_path_out) +{ + if (!scanid || !status_path_out) return -1; + + /* Create directories recursively */ + mkdir("/tmp/engine", 0755); + mkdir("/tmp/engine/batch-scan", 0755); + + /* Build status file path */ + snprintf(status_path_out, PATH_MAX, "/tmp/engine/batch-scan/%s", scanid); + + /* Remove old file/pipe if exists (avoids blocking on old FIFOs) */ + unlink(status_path_out); + + scanlog("Status file path: %s\n", status_path_out); + return 0; +} + /** * @brief Performs a wfp scan. * Files with wfp extension will be scanned in this mode. @@ -221,23 +298,62 @@ int hash_scan(char *path, int scan_max_snippets, int scan_max_components, bool a * @param scan_max_components Limit for component to be displayed. 1 by default. * @return EXIT_SUCCESS */ -int wfp_scan(char * path, int scan_max_snippets, int scan_max_components, bool adjust_tolerance, int component_ranking_threshold, int snippet_min_hits, int snippet_min_lines, int snippet_range_tolerance, bool snippet_honor_file_extension) +int wfp_scan(char * path, int scan_max_snippets, int scan_max_components, bool adjust_tolerance, int component_ranking_threshold, int snippet_min_hits, int snippet_min_lines, int snippet_range_tolerance, bool snippet_honor_file_extension, bool report_progress) { scan_data_t * scan = NULL; char * line = NULL; size_t len = 0; ssize_t lineln; uint8_t *rec = NULL; - + + /* Progress tracking variables */ + char *scanid = NULL; + char status_path[PATH_MAX] = {0}; + time_t started = time(NULL); + long total_lines = 0; + long current_line = 0; + int progress = 0; + bool status_enabled = false; + scanlog("--- WFP SCAN ---\n"); + + /* Create status file for progress reporting if enabled */ + if (report_progress) { + scanid = extract_scanid_from_path(path); + if (scanid && create_status_file(scanid, status_path) == 0) { + status_enabled = true; + scanlog("Scan ID: %s\n", scanid); + } + } + /* Open WFP file */ FILE *fp = fopen(path, "r"); if (fp == NULL) { fprintf(stdout, "E017 Cannot open target"); + if (status_enabled) { + write_progress_to_file(status_path, started, "failed", 0); + } + if (scanid) free(scanid); return EXIT_FAILURE; } + /* Count total lines first for progress calculation */ + scanlog("Counting total lines...\n"); + char *count_line = NULL; + size_t count_len = 0; + while (getline(&count_line, &count_len, fp) != -1) { + total_lines++; + } + if (count_line) free(count_line); + rewind(fp); + scanlog("Total lines: %ld\n", total_lines); + + /* Write initial progress */ + if (status_enabled) { + write_progress_to_file(status_path, started, "scanning", 0); + } + /* Get wfp MD5 hash */ uint8_t tmp_md5[16]; get_file_md5(path, tmp_md5); @@ -248,6 +364,17 @@ int wfp_scan(char * path, int scan_max_snippets, int scan_max_components, bool a { trim(line); + /* Update progress */ + current_line++; + if (status_enabled && total_lines > 0) { + int new_progress = (int)((current_line * 100) / total_lines); + /* Update pipe every 1% change to avoid excessive writes */ + if (new_progress != progress) { + progress = new_progress; + write_progress_to_file(status_path, started, "scanning", progress); + } + } + bool is_file = (memcmp(line, "file=", 5) == 0); bool is_fh2 = (memcmp(line, "fh2=", 4) == 0); bool is_hpsm = (memcmp(line, "hpsm=", 5) == 0); @@ -357,10 +484,16 @@ int wfp_scan(char * path, int scan_max_snippets, int scan_max_components, bool a /* Scan the last file */ ldb_scan(scan); + /* Report completion */ + if (status_enabled) { + write_progress_to_file(status_path, started, "completed", 100); + } + fclose(fp); if (line) free(line); - + free(tmp_md5_hex); + if (scanid) free(scanid); return EXIT_SUCCESS; }