Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion inc/scan.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ void scan_data_free (scan_data_t * scan);

void ldb_scan(scan_data_t * scan);
match_t ldb_scan_snippets(scan_data_t *scan_ptr);
int wfp_scan(char * path, int scan_max_snippets, int scan_max_components, bool adjust_tolerance, int component_ranking_threshold, int snippet_min_hits, int snippet_min_lines, int snippet_range_tolerance, bool snippet_honor_file_extension);
int wfp_scan(char * path, int scan_max_snippets, int scan_max_components, bool adjust_tolerance, int component_ranking_threshold, int snippet_min_hits, int snippet_min_lines, int snippet_range_tolerance, bool snippet_honor_file_extension, bool report_progress);
int hash_scan(char *path, int scan_max_snippets, int scan_max_components, bool adjust_tolerance, int component_ranking_threshold, int snippet_min_hits, int snippet_min_lines, int snippet_range_tolerance, bool snippet_honor_file_extension);

#endif
7 changes: 7 additions & 0 deletions src/help.c
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,13 @@ Options:\n\
-d, --debug Store debugging information to disk (/tmp).\n\
-q, --quiet Suppress JSON output (show only debugging info via STDERR).\n\
\n\
Batch Processing:\n\
-R, --report Enable batch mode: write progress to /tmp/engine/batch-scan/<scanid>\n\
and results to /tmp/engine/batch-result/<scanid> instead of STDOUT.\n\
-C, --clean Clean completed status files older than 2 hours from batch-scan directory.\n\
--batch-status ID Show scan progress/status for the given scan ID.\n\
--batch-result ID Show scan results for the given scan ID (only if completed).\n\
\n\
Environment variables:\n\
SCANOSS_MATCHMAP_MAX: Set the snippet scanning match map size (default: %d).\n\
SCANOSS_FILE_CONTENTS_URL: Define the API URL endpoint for sources. Source URL won't be reported if not defined.\n\
Expand Down
251 changes: 246 additions & 5 deletions src/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@
#include "hpsm.h"
#include <dlfcn.h>
#include <getopt.h>
#include <errno.h>
#include <libgen.h>
#include <sys/stat.h>

struct ldb_table oss_url;
struct ldb_table oss_file;
Expand All @@ -70,6 +73,7 @@ int scan_range_tolerance = SNIPPETS_DEFAULT_RANGE_TOLERANCE; // Maximum number o
bool scan_adjust_tolerance = SNIPPETS_DEFAULT_ADJUST_TOLERANCE; /** Adjust tolerance based on file size */
int scan_ranking_threshold = -1; //disable by defaults
bool scan_honor_file_extension = SNIPPETS_DEFAULT_HONOR_FILE_EXTENSION;
bool scan_report_progress = false;

bool lib_encoder_present = false;
#define LDB_VER_MIN "4.1.0"
Expand Down Expand Up @@ -216,7 +220,7 @@ void recurse_directory(char *name)

if (wfp)
wfp_scan(path, scan_max_snippets, scan_max_components, scan_adjust_tolerance,
scan_ranking_threshold, scan_min_match_hits, scan_min_match_lines, scan_range_tolerance, scan_honor_file_extension);
scan_ranking_threshold, scan_min_match_hits, scan_min_match_lines, scan_range_tolerance, scan_honor_file_extension, scan_report_progress);
else
{
scan_data_t * scan = scan_data_init(path, scan_max_snippets, scan_max_components, scan_adjust_tolerance,
Expand Down Expand Up @@ -248,6 +252,206 @@ bool validate_alpha(char *txt)
return true;
}

#define STATUS_DIR "/tmp/engine/batch-scan"
#define RESULT_DIR "/tmp/engine/batch-result"
#define MAX_AGE_SECONDS (2 * 60 * 60) /* 2 hours */

/**
* @brief Extracts scan ID from file path (filename without extension)
* @param path File path
* @return Allocated string with scan ID, caller must free
*/
static char* get_scanid_from_path(const char *path)
{
if (!path) return NULL;

/* Get basename */
char *path_copy = strdup(path);
char *base = basename(path_copy);

/* Remove extension */
char *scanid = strdup(base);
char *dot = strrchr(scanid, '.');
if (dot) *dot = '\0';

free(path_copy);
return scanid;
Comment on lines +269 to +278
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Missing NULL check after strdup (static analysis: nullPointerOutOfMemory).

If strdup(path) on line 269 returns NULL (OOM), basename(path_copy) on line 270 dereferences it. Same for strdup(base) on line 273—if it fails, strrchr(scanid, '.') on line 274 dereferences NULL.

Proposed fix
 	char *path_copy = strdup(path);
+	if (!path_copy) return NULL;
 	char *base = basename(path_copy);
 
 	/* Remove extension */
 	char *scanid = strdup(base);
+	if (!scanid) {
+		free(path_copy);
+		return NULL;
+	}
 	char *dot = strrchr(scanid, '.');
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
char *path_copy = strdup(path);
char *base = basename(path_copy);
/* Remove extension */
char *scanid = strdup(base);
char *dot = strrchr(scanid, '.');
if (dot) *dot = '\0';
free(path_copy);
return scanid;
char *path_copy = strdup(path);
if (!path_copy) return NULL;
char *base = basename(path_copy);
/* Remove extension */
char *scanid = strdup(base);
if (!scanid) {
free(path_copy);
return NULL;
}
char *dot = strrchr(scanid, '.');
if (dot) *dot = '\0';
free(path_copy);
return scanid;
🧰 Tools
🪛 Cppcheck (2.19.0)

[warning] 274-274: If memory allocation fails, then there is a possible null pointer dereference

(nullPointerOutOfMemory)

🤖 Prompt for AI Agents
In `@src/main.c` around lines 269 - 278, The code must check for allocation
failures: after calling strdup(path) assign to path_copy and if it's NULL,
return NULL (or propagate error) without calling basename; after strdup(base)
assign to scanid and if it's NULL, free path_copy and return NULL; only call
basename and strrchr when their inputs are non-NULL; ensure any allocated memory
(path_copy) is freed on all error paths before returning and the function
returns a clear error indicator (e.g., NULL) when strdup fails.

}
Comment on lines +264 to +279
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

Path traversal vulnerability: user-supplied scan IDs are used unsanitized in file paths.

--batch-result ID and --batch-status ID pass optarg directly to show_batch_result / show_batch_status, which concatenate it into a path like /tmp/engine/batch-scan/<ID>. A malicious user can supply ../../etc/passwd to read arbitrary files.

get_scanid_from_path (used for the -R flow) strips to a basename, but the --batch-result / --batch-status paths bypass this entirely and use raw user input.

Add input validation to reject scan IDs containing /, .., or any non-alphanumeric/non-hyphen characters.

🛡️ Proposed fix: add a validation helper
+/**
+ * `@brief` Validates a scan ID contains only safe characters
+ * `@param` scanid Scan ID to validate
+ * `@return` true if valid, false otherwise
+ */
+static bool is_valid_scanid(const char *scanid)
+{
+	if (!scanid || *scanid == '\0') return false;
+	for (const char *p = scanid; *p; p++) {
+		if (!isalnum(*p) && *p != '-' && *p != '_') return false;
+	}
+	return true;
+}

Then guard usage:

 		case 262: /* --batch-result */
-			if (show_batch_result(optarg) == 0)
+			if (!is_valid_scanid(optarg)) {
+				fprintf(stderr, "Error: Invalid scan ID\n");
+				exit(EXIT_FAILURE);
+			}
+			if (show_batch_result(optarg) == 0)
 				exit(EXIT_SUCCESS);

Apply the same guard for case 263 (--batch-status).

Also applies to: 313-365, 372-395

🧰 Tools
🪛 Cppcheck (2.19.0)

[warning] 274-274: If memory allocation fails, then there is a possible null pointer dereference

(nullPointerOutOfMemory)

🤖 Prompt for AI Agents
In `@src/main.c` around lines 264 - 279, The code is vulnerable to path traversal
because user-supplied IDs passed into show_batch_result and show_batch_status
are concatenated into file paths without sanitization; add a validation helper
(e.g., is_valid_scanid) that returns false for any scan id containing '/' or
".." or any character outside [A-Za-z0-9-], and use it to reject/return an error
before calling show_batch_result, show_batch_status (and any other places that
accept raw IDs, including the -R flow where get_scanid_from_path is used) so
only validated IDs are used to build file paths.


/**
* @brief Setup output redirection to batch result file
* @param scanid Scan ID for this operation
* @return 0 on success, -1 on error
*/
static int setup_batch_result_output(const char *scanid)
{
if (!scanid) return -1;

/* Create directories */
mkdir("/tmp/engine", 0755);
mkdir(RESULT_DIR, 0755);

/* Build result file path */
char result_path[MAX_PATH];
snprintf(result_path, sizeof(result_path), "%s/%s", RESULT_DIR, scanid);

/* Redirect stdout to file */
if (freopen(result_path, "w", stdout) == NULL) {
fprintf(stderr, "Error: Could not redirect output to %s: %s\n", result_path, strerror(errno));
return -1;
}

return 0;
}
Comment on lines +286 to +305
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

freopen on stdout is never restored — downstream output (e.g., cleanup messages, error paths) silently goes to the result file.

After setup_batch_result_output, all printf/fprintf(stdout, ...) calls, including potential error messages, will be written to the batch result file. If an error occurs after this point, the user sees nothing on the terminal. Consider using a separate FILE* for result output instead of hijacking stdout, or at minimum document this intentional behavior and ensure all error paths use stderr.

🤖 Prompt for AI Agents
In `@src/main.c` around lines 286 - 305, setup_batch_result_output currently
hijacks stdout via freopen(result_path, "w", stdout) which causes all subsequent
stdout output (including later informational or error messages) to be written
into the result file; instead open the result file with fopen into a dedicated
FILE* (e.g., result_fp) rather than calling freopen, update callers or make
result_fp accessible (or return it) so result output goes to that FILE*, and
ensure you fclose(result_fp) in cleanup; if you must redirect stdout, capture
and restore the original stdout file descriptor using dup/dup2 and provide a
corresponding restore function, but prefer the simpler fix: replace freopen
usage in setup_batch_result_output with fopen into a new result FILE* and adjust
code paths to write results to result_fp while leaving stdout/stderr unchanged
(refer to function setup_batch_result_output and any cleanup path that should
close the file).


/**
* @brief Show batch result file content to stdout
* Verifies scan status before returning results
* @param scanid Scan ID to retrieve
* @return 0 on success, -1 on error
*/
static int show_batch_result(const char *scanid)
{
if (!scanid) {
fprintf(stderr, "Error: No scan ID provided\n");
return -1;
}

/* First check the status file */
char status_path[MAX_PATH];
snprintf(status_path, sizeof(status_path), "%s/%s", STATUS_DIR, scanid);

FILE *status_fp = fopen(status_path, "r");
if (!status_fp) {
printf("{\"message\":\"Scan not found\"}\n");
return -1;
}

char status_buffer[512];
bool is_completed = false;

if (fgets(status_buffer, sizeof(status_buffer), status_fp) != NULL) {
char *status_ptr = strstr(status_buffer, "\"status\":\"");
if (status_ptr && strncmp(status_ptr + 10, "completed", 9) == 0) {
is_completed = true;
}
}
fclose(status_fp);

/* If not completed, return the status file content directly */
if (!is_completed) {
printf("%s", status_buffer);
return 0;
}

/* Status is completed, return the result */
char result_path[MAX_PATH];
snprintf(result_path, sizeof(result_path), "%s/%s", RESULT_DIR, scanid);

FILE *fp = fopen(result_path, "r");
if (!fp) {
printf("{\"message\":\"Result file not found\"}\n");
return -1;
}

char buffer[4096];
size_t bytes;
while ((bytes = fread(buffer, 1, sizeof(buffer), fp)) > 0) {
fwrite(buffer, 1, bytes, stdout);
}

fclose(fp);
return 0;
}

/**
* @brief Show batch status file content to stdout
* @param scanid Scan ID to retrieve
* @return 0 on success, -1 on error
*/
static int show_batch_status(const char *scanid)
{
if (!scanid) {
fprintf(stderr, "Error: No scan ID provided\n");
return -1;
}

char status_path[MAX_PATH];
snprintf(status_path, sizeof(status_path), "%s/%s", STATUS_DIR, scanid);

FILE *fp = fopen(status_path, "r");
if (!fp) {
fprintf(stderr, "Error: Status file not found: %s\n", status_path);
return -1;
}

char buffer[512];
if (fgets(buffer, sizeof(buffer), fp) != NULL) {
printf("%s", buffer);
}

fclose(fp);
return 0;
}

/**
* @brief Clean old completed status files from the batch-scan directory
* Removes files that have status "completed" and are older than 2 hours
* @return Number of files cleaned
*/
int clean_old_status_files(void)
{
DIR *dir = opendir(STATUS_DIR);
if (!dir) {
printf("Status directory does not exist: %s\n", STATUS_DIR);
return 0;
}

struct dirent *entry;
int cleaned = 0;
time_t now = time(NULL);
char filepath[MAX_PATH];
char buffer[512];

while ((entry = readdir(dir)) != NULL) {
if (entry->d_name[0] == '.') continue;

snprintf(filepath, sizeof(filepath), "%s/%s", STATUS_DIR, entry->d_name);

FILE *fp = fopen(filepath, "r");
if (!fp) continue;

if (fgets(buffer, sizeof(buffer), fp) != NULL) {
/* Parse JSON: {"started":TIMESTAMP,"status":"STATUS","progress":NUM} */
char *status_ptr = strstr(buffer, "\"status\":\"");
char *started_ptr = strstr(buffer, "\"started\":");

if (status_ptr && started_ptr) {
/* Check if status is "completed" */
if (strncmp(status_ptr + 10, "completed", 9) == 0) {
/* Extract timestamp */
time_t started = atol(started_ptr + 10);

/* Check if older than MAX_AGE_SECONDS */
if ((now - started) >= MAX_AGE_SECONDS) {
fclose(fp);
if (unlink(filepath) == 0) {
cleaned++;
printf("Cleaned: %s\n", entry->d_name);
}
continue;
}
}
}
}
fclose(fp);
}

closedir(dir);
printf("Total files cleaned: %d\n", cleaned);
return cleaned;
}
Comment on lines +402 to +453
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

clean_old_status_files also cleans corresponding result files — or does it?

When a completed status file is cleaned, the corresponding result file in RESULT_DIR is left behind. Over time, orphaned result files will accumulate in /tmp/engine/batch-result/. Consider also removing the matching result file when cleaning a status file.

Proposed addition inside the cleanup block
 					if (unlink(filepath) == 0) {
 						cleaned++;
 						printf("Cleaned: %s\n", entry->d_name);
+						/* Also remove the corresponding result file */
+						char result_path[MAX_PATH];
+						snprintf(result_path, sizeof(result_path), "%s/%s", RESULT_DIR, entry->d_name);
+						unlink(result_path);
 					}
🤖 Prompt for AI Agents
In `@src/main.c` around lines 402 - 453, clean_old_status_files currently unlinks
only the status file; update it to also remove the matching result file in
RESULT_DIR when a status file is deleted. Inside the block where
unlink(filepath) succeeds (use the same entry->d_name), build a result path
(e.g., snprintf(resultpath, sizeof(resultpath), "%s/%s", RESULT_DIR,
entry->d_name)), attempt unlink(resultpath) if it exists, and log
success/failure (e.g., printf("Removed result: %s\n", entry->d_name) or an
error). Use the existing symbols filepath, entry->d_name, RESULT_DIR, cleaned
and ensure you only increment cleaned for the cleaned status file (leave error
handling and fclose/closedir as currently implemented).


/**
* @brief Read flags from /etc/scanoss_flags.cfg
* @return //TODO
Expand Down Expand Up @@ -299,6 +503,10 @@ static struct option long_options[] = {
{"debug", no_argument, 0, 'd'},
{"quiet", no_argument, 0, 'q'},
{"hpsm", no_argument, 0, 'H'},
{"report", no_argument, 0, 'R'},
{"clean", no_argument, 0, 'C'},
{"batch-result", required_argument, 0, 262},
{"batch-status", required_argument, 0, 263},
{0, 0, 0, 0}
};

Expand All @@ -325,7 +533,7 @@ int main(int argc, char **argv)

bool force_wfp = false;
bool force_bfp = false;

microseconds_start = microseconds_now();

/* Parse arguments */
Expand All @@ -334,7 +542,7 @@ int main(int argc, char **argv)
bool invalid_argument = false;
char * ldb_db_name = NULL;

while ((option = getopt_long(argc, argv, ":r:T:s:b:c:k:a:F:l:n:M:N:wtLvhdqH", long_options, &option_index)) != -1)
while ((option = getopt_long(argc, argv, ":r:T:s:b:c:k:a:F:l:n:M:N:wtLvhdqHRC", long_options, &option_index)) != -1)
{
/* Check valid alpha is entered */
if (optarg)
Expand Down Expand Up @@ -487,6 +695,30 @@ int main(int argc, char **argv)
exit(EXIT_FAILURE);
}
break;

case 'R':
scan_report_progress = true;
scanlog("Progress reporting enabled\n");
break;

case 'C':
clean_old_status_files();
exit(EXIT_SUCCESS);
break;

case 262: /* --batch-result */
if (show_batch_result(optarg) == 0)
exit(EXIT_SUCCESS);
else
exit(EXIT_FAILURE);
break;

case 263: /* --batch-status */
if (show_batch_status(optarg) == 0)
exit(EXIT_SUCCESS);
else
exit(EXIT_FAILURE);
break;
}
if (invalid_argument) break;
}
Expand Down Expand Up @@ -533,6 +765,14 @@ int main(int argc, char **argv)
strcpy (target, argv[argc-1]);
for (int i=strlen(target)-1; i>=0; i--) if (target[i]=='/') target[i]=0; else break;

/* Redirect output to batch result file if report mode is enabled */
char *batch_scanid = NULL;
if (scan_report_progress) {
batch_scanid = get_scanid_from_path(target);
if (batch_scanid) {
setup_batch_result_output(batch_scanid);
}
}
Comment on lines +768 to +775
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Batch output setup silently skipped if scanid extraction fails.

If get_scanid_from_path(target) returns NULL (e.g., the path has no usable filename), scan_report_progress remains true but no redirection occurs. Output goes to stdout while src/scan.c still creates status files and writes progress. Consider logging a warning or falling back gracefully.

🤖 Prompt for AI Agents
In `@src/main.c` around lines 768 - 775, When scan_report_progress is true but
get_scanid_from_path(target) returns NULL, the batch redirection is silently
skipped; change the logic around batch_scanid in main.c to detect a NULL return
from get_scanid_from_path(target) and either call setup_batch_result_output with
a safe fallback ID or emit a warning via the existing logging mechanism (e.g.,
log_warn/log_error) so the user knows redirection failed; update the branch that
currently only calls setup_batch_result_output(batch_scanid) to handle the NULL
case (using batch_scanid, get_scanid_from_path, setup_batch_result_output and
the same scan progress flow in src/scan.c) and ensure behavior is consistent
with how status files are created so output doesn’t silently stay on stdout.


/* Open main report structure */
json_open();
Expand Down Expand Up @@ -560,7 +800,7 @@ int main(int argc, char **argv)
/* Scan wfp file */
if (wfp_extension)
wfp_scan(target, scan_max_snippets, scan_max_components, scan_adjust_tolerance,
scan_ranking_threshold, scan_min_match_hits, scan_min_match_lines, scan_range_tolerance, scan_honor_file_extension);
scan_ranking_threshold, scan_min_match_hits, scan_min_match_lines, scan_range_tolerance, scan_honor_file_extension, scan_report_progress);

else if (bfp_extension)
binary_scan(target);
Expand All @@ -579,7 +819,8 @@ int main(int argc, char **argv)
/* Close main report structure */
json_close();

if (target) free (target);
if (target) free(target);
if (batch_scanid) free(batch_scanid);
}

if (ignore_components)
Expand Down
Loading
Loading