11import os
22import re
3- from pygments import highlight
4- from pygments .lexers import get_lexer_for_filename
5- from pygments .token import Token
63
74
85def count_inline_comments (file_path ):
96 """
10- Count inline comments in a source code file.
7+ Count inline comments in a source code file using regex patterns .
118
129 An inline comment is a comment that appears on the same line as code,
1310 not on a line by itself.
@@ -19,140 +16,149 @@ def count_inline_comments(file_path):
1916 int: Number of inline comments found
2017
2118 Raises:
22- ValueError: If the file extension is not supported by Pygments
19+ ValueError: If the file extension is not supported
2320 FileNotFoundError: If the file doesn't exist
2421 """
2522 if not os .path .exists (file_path ):
2623 raise FileNotFoundError (f"File not found: { file_path } " )
2724
28- try :
29- # Get the appropriate lexer for the file
30- lexer = get_lexer_for_filename (file_path )
31- except Exception :
32- raise ValueError (f"Unsupported file extension: { file_path } " )
25+ # Get file extension
26+ _ , ext = os .path .splitext (file_path )
27+
28+ # Define comment patterns for different languages
29+ comment_patterns = {
30+ '.py' : r'#' ,
31+ '.js' : r'//' ,
32+ '.go' : r'//' ,
33+ '.rb' : r'#' ,
34+ '.java' : r'//' ,
35+ '.cpp' : r'//' ,
36+ '.c' : r'//' ,
37+ '.cs' : r'//' ,
38+ '.php' : r'//' ,
39+ '.swift' : r'//' ,
40+ '.kt' : r'//' ,
41+ '.scala' : r'//' ,
42+ '.rs' : r'//' ,
43+ '.ts' : r'//' ,
44+ '.jsx' : r'//' ,
45+ '.tsx' : r'//' ,
46+ }
47+
48+ if ext not in comment_patterns :
49+ raise ValueError (f"Unsupported file extension: { ext } " )
50+
51+ comment_marker = comment_patterns [ext ]
3352
34- # Read the file content
3553 try :
3654 with open (file_path , 'r' , encoding = 'utf-8' ) as f :
3755 content = f .read ()
3856 except UnicodeDecodeError :
39- # Try with different encoding if UTF-8 fails
4057 with open (file_path , 'r' , encoding = 'latin-1' ) as f :
4158 content = f .read ()
4259
4360 if not content .strip ():
4461 return 0
4562
46- # Tokenize the content
47- tokens = list (lexer .get_tokens (content ))
48-
49- # Group tokens by line
5063 lines = content .splitlines ()
51- line_tokens = {i + 1 : [] for i in range (len (lines ))}
52-
53- current_line = 1
54- current_pos = 0
55-
56- for token_type , token_value in tokens :
57- if token_value == '\n ' :
58- current_line += 1
59- current_pos = 0
60- elif token_value :
61- # Find which line this token belongs to
62- token_lines = token_value .count ('\n ' )
63- if token_lines == 0 :
64- line_tokens [current_line ].append ((token_type , token_value ))
65- else :
66- # Multi-line token
67- parts = token_value .split ('\n ' )
68- for i , part in enumerate (parts ):
69- if part :
70- line_tokens [current_line + i ].append ((token_type , part ))
71- current_line += token_lines
72-
7364 inline_comment_count = 0
7465
75- # Check each line for inline comments
76- for line_num , line_token_list in line_tokens .items ():
77- if not line_token_list :
78- continue
79-
80- # Check if this line has both code and comments
81- has_code = False
82- has_comment = False
83-
84- for token_type , token_value in line_token_list :
85- # Skip whitespace tokens
86- if token_type in (Token .Text , Token .Text .Whitespace ) and token_value .strip () == '' :
87- continue
88-
89- # Check if it's a comment token
90- if token_type in Token .Comment :
91- has_comment = True
92- elif token_type not in (Token .Text , Token .Text .Whitespace ):
93- # Non-whitespace, non-comment token = code
94- has_code = True
95-
96- # If the line has both code and comments, it contains an inline comment
97- if has_code and has_comment :
66+ for line in lines :
67+ if _has_inline_comment (line , comment_marker ):
9868 inline_comment_count += 1
9969
10070 return inline_comment_count
10171
10272
103- # Alternative simpler implementation using regex patterns
104- def count_inline_comments_regex (file_path ):
73+ def _has_inline_comment (line , comment_marker ):
10574 """
106- Alternative implementation using regex patterns for comment detection.
107- This is simpler but less accurate than the Pygments-based approach.
75+ Check if a line has an inline comment (comment on same line as code).
76+
77+ Args:
78+ line (str): The line to check
79+ comment_marker (str): The comment marker for the language (e.g., '//', '#')
80+
81+ Returns:
82+ bool: True if the line has an inline comment, False otherwise
10883 """
109- if not os . path . exists ( file_path ):
110- raise FileNotFoundError ( f"File not found: { file_path } " )
84+ # Remove leading/trailing whitespace
85+ line = line . strip ( )
11186
112- # Get file extension
113- _ , ext = os .path .splitext (file_path )
87+ # Empty line or line with only whitespace
88+ if not line :
89+ return False
11490
115- # Define comment patterns for different languages
116- comment_patterns = {
117- '.py' : r'#.*' ,
118- '.js' : r'//.*' ,
119- '.go' : r'//.*' ,
120- '.rb' : r'#.*' ,
121- '.java' : r'//.*' ,
122- '.cpp' : r'//.*' ,
123- '.c' : r'//.*' ,
124- '.cs' : r'//.*' ,
125- '.php' : r'//.*' ,
126- '.swift' : r'//.*' ,
127- '.kt' : r'//.*' ,
128- '.scala' : r'//.*' ,
129- }
91+ # Line starts with comment marker (full-line comment)
92+ if line .startswith (comment_marker ):
93+ return False
13094
131- if ext not in comment_patterns :
132- raise ValueError ( f"Unsupported file extension: { ext } " )
95+ # Find comment marker in the line
96+ comment_index = line . find ( comment_marker )
13397
134- comment_pattern = comment_patterns [ext ]
98+ # No comment marker found
99+ if comment_index == - 1 :
100+ return False
135101
136- try :
137- with open (file_path , 'r' , encoding = 'utf-8' ) as f :
138- lines = f .readlines ()
139- except UnicodeDecodeError :
140- with open (file_path , 'r' , encoding = 'latin-1' ) as f :
141- lines = f .readlines ()
102+ # Check if there's non-whitespace code before the comment
103+ code_before_comment = line [:comment_index ].strip ()
142104
143- inline_comment_count = 0
105+ # Handle string literals that might contain comment markers
106+ if _is_comment_in_string (line , comment_index ):
107+ return False
144108
145- for line in lines :
146- line = line .strip ()
147- if not line :
148- continue
149-
150- # Find comment in the line
151- comment_match = re .search (comment_pattern , line )
152- if comment_match :
153- # Check if there's code before the comment
154- code_before_comment = line [:comment_match .start ()].strip ()
155- if code_before_comment :
156- inline_comment_count += 1
157-
158- return inline_comment_count
109+ # If there's code before the comment, it's an inline comment
110+ return bool (code_before_comment )
111+
112+
113+ def _is_comment_in_string (line , comment_index ):
114+ """
115+ Check if the comment marker is inside a string literal.
116+ This is a simplified check that handles basic cases.
117+
118+ Args:
119+ line (str): The line to check
120+ comment_index (int): Index of the comment marker
121+
122+ Returns:
123+ bool: True if the comment marker is likely inside a string
124+ """
125+ # Count quotes before the comment marker
126+ line_before_comment = line [:comment_index ]
127+
128+ # Count single and double quotes (basic check)
129+ single_quotes = line_before_comment .count ("'" )
130+ double_quotes = line_before_comment .count ('"' )
131+
132+ # Simple heuristic: if odd number of quotes, we're likely inside a string
133+ # This is not perfect but handles most common cases
134+ in_single_quote_string = single_quotes % 2 == 1
135+ in_double_quote_string = double_quotes % 2 == 1
136+
137+ return in_single_quote_string or in_double_quote_string
138+
139+
140+ # More robust string detection (optional, more complex)
141+ def _is_comment_in_string_robust (line , comment_index ):
142+ """
143+ More robust check for comment markers inside strings.
144+ Handles escaped quotes and mixed quote types.
145+ """
146+ i = 0
147+ in_single_string = False
148+ in_double_string = False
149+
150+ while i < comment_index :
151+ char = line [i ]
152+
153+ if char == '"' and not in_single_string :
154+ # Check if it's escaped
155+ if i == 0 or line [i - 1 ] != '\\ ' :
156+ in_double_string = not in_double_string
157+ elif char == "'" and not in_double_string :
158+ # Check if it's escaped
159+ if i == 0 or line [i - 1 ] != '\\ ' :
160+ in_single_string = not in_single_string
161+
162+ i += 1
163+
164+ return in_single_string or in_double_string
0 commit comments