FailMapper/feedback.py at main · nuwaLab/FailMapper · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
import os
import re
import glob
import time
import json
import argparse
import subprocess
import traceback
import xml.etree.ElementTree as ET
from collections import defaultdict
import pandas as pd
import requests
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry
import logging
import random
import anthropic
import statistics

# Set up logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler("test_generator.log"),
        logging.StreamHandler()
    ]
)
logger = logging.getLogger("test_generator")

# API settings
OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY')
API_BASE = os.environ.get("OPENAI_API_BASE", "https://api.openai.com/v1")
DEFAULT_MODEL = os.environ.get("OPENAI_MODEL", "gpt-4o")
DEFAULT_MAX_TOKENS = 4096
DEFAULT_TEMPERATURE = 0.7


ANTHROPIC_API_KEY = os.environ.get('ANTHROPIC_API_KEY')
ANTHROPIC_API_BASE = "https://api.anthropic.com/v1/"
ANTHROPIC_DEFAULT_MODEL = "claude-3-5-sonnet-20241022"
ANTHROPIC_DEFAULT_MAX_TOKENS = 8192
ANTHROPIC_DEFAULT_TEMPERATURE = 0.7

# DeepSeek API settings
DEEPSEEK_API_KEY = os.environ.get('DEEPSEEK_API_KEY')
DEEPSEEK_API_BASE = "https://api.deepseek.com/v1"
DEEPSEEK_DEFAULT_MODEL = "deepseek-coder"
DEEPSEEK_DEFAULT_MAX_TOKENS = 8192
DEEPSEEK_DEFAULT_TEMPERATURE = 0.7

# Global metrics tracking variables
llm_metrics = {
    "request_count": 0,
    "token_sizes": [],
    "start_time": None,
    "end_time": None,
    "request_times": [],
}

def reset_llm_metrics():
    """Reset all LLM metrics to initial values."""
    global llm_metrics
    llm_metrics = {
        "request_count": 0,
        "token_sizes": [],
        "start_time": time.time(),
        "end_time": None,
        "request_times": [],
    }

def get_llm_metrics_summary():
    """Get a summary of LLM metrics from the current run."""
    global llm_metrics

    # Update end time
    llm_metrics["end_time"] = time.time()

    # Calculate statistics
    total_time = llm_metrics["end_time"] - llm_metrics["start_time"]
    token_sizes = llm_metrics["token_sizes"]

    summary = {
        "total_requests": llm_metrics["request_count"],
        "max_token_size": max(token_sizes) if token_sizes else 0,
        "min_token_size": min(token_sizes) if token_sizes else 0,
        "avg_token_size": statistics.mean(token_sizes) if token_sizes else 0,
        "total_time_seconds": total_time,
        "total_time_minutes": total_time / 60,
        "avg_request_time": statistics.mean(llm_metrics["request_times"]) if llm_metrics["request_times"] else 0
    }

    return summary

def _estimate_token_size(text):
    """Estimate token size based on a simple heuristic."""
    # A rough approximation: 1 token is about 4 characters for English text
    return len(text) // 4

# ChatGPT API Call
def create_session():
    """Create a session object for API requests"""
    session = requests.Session()
    retries = Retry(total=3, backoff_factor=0.5, status_forcelist=[429, 500, 502, 503, 504])
    session.mount('https://', HTTPAdapter(max_retries=retries))
    return session

def call_gpt_api(prompt, model=DEFAULT_MODEL, max_tokens=DEFAULT_MAX_TOKENS, temperature=DEFAULT_TEMPERATURE):
    """
    Call the GPT API with the provided prompt.

    Parameters:
    prompt (str): The prompt to send to the API
    model (str): Model to use, defaults to DEFAULT_MODEL
    max_tokens (int): Maximum tokens in response, defaults to DEFAULT_MAX_TOKENS
    temperature (float): Temperature for response generation, defaults to DEFAULT_TEMPERATURE

    Returns:
    str: The model's response
    """
    global llm_metrics

    # Initialize metrics if this is the first call
    if llm_metrics["start_time"] is None:
        reset_llm_metrics()

    # Track number of requests
    llm_metrics["request_count"] += 1

    # Estimate token size of the prompt
    estimated_token_size = _estimate_token_size(prompt)
    llm_metrics["token_sizes"].append(estimated_token_size)

    # Track time for this request
    request_start = time.time()

    try:
        if not OPENAI_API_KEY:
            raise ValueError("OpenAI API key not set. Please set OPENAI_API_KEY environment variable.")

        session = create_session()

        headers = {
            "Authorization": f"Bearer {OPENAI_API_KEY}",
            "Content-Type": "application/json"
        }

        data = {
            "model": model,
            "messages": [{"role": "user", "content": prompt}],
            "max_tokens": max_tokens,
            "temperature": temperature
        }

        logger.info(f"Calling API: model={model}, max_tokens={max_tokens}")

        # Add retry and delay mechanism
        retries = 3
        backoff = 5  # Initial wait time in seconds

        for attempt in range(retries):
            try:
                response = session.post(
                    f"{API_BASE}/chat/completions",
                    headers=headers,
                    json=data,
                    timeout=120  # Increase timeout, API may need more time to process
                )

                response.raise_for_status()
                response_json = response.json()

                if "choices" in response_json and len(response_json["choices"]) > 0:
                    return response_json["choices"][0]["message"]["content"]
                else:
                    logger.error(f"Choices not found in API response: {response_json}")
                    return ""
            except requests.exceptions.HTTPError as e:
                if e.response.status_code == 429:  # Too Many Requests
                    wait_time = backoff * (2 ** attempt)  # Exponential backoff
                    logger.warning(f"API rate limit exceeded, waiting {wait_time} seconds before retrying...")
                    time.sleep(wait_time)
                else:
                    logger.error(f"API call error: {str(e)}")
                    continue
            except Exception as e:
                logger.error(f"API call failed: {str(e)}")
                logger.error(traceback.format_exc())
                continue

        logger.error("API call retry limit exceeded")
        return ""
    except Exception as e:
        # Track request time even for exceptions
        request_time = time.time() - request_start
        llm_metrics["request_times"].append(request_time)

        logging.error(f"Exception in call_gpt_api: {str(e)}")
        return f"ERROR: {str(e)}"


# Claude API Call
def create_anthropic_session():
    """Create a session object for API requests"""
    session = requests.Session()
    return session

def call_anthropic_api(prompt, model=ANTHROPIC_DEFAULT_MODEL, max_tokens=ANTHROPIC_DEFAULT_MAX_TOKENS, temperature=ANTHROPIC_DEFAULT_TEMPERATURE):
    """
    Call the Anthropic API with the provided prompt.

    Parameters:
    prompt (str): The prompt to send to the API
    model (str): Model to use, defaults to ANTHROPIC_DEFAULT_MODEL
    max_tokens (int): Maximum tokens in response, defaults to ANTHROPIC_DEFAULT_MAX_TOKENS
    temperature (float): Temperature for response generation, defaults to ANTHROPIC_DEFAULT_TEMPERATURE

    Returns:
    str: The model's response
    """
    global llm_metrics

    # Initialize metrics if this is the first call
    if llm_metrics["start_time"] is None:
        reset_llm_metrics()

    # Track number of requests
    llm_metrics["request_count"] += 1

    # Estimate token size of the prompt
    estimated_token_size = _estimate_token_size(prompt)
    llm_metrics["token_sizes"].append(estimated_token_size)

    # Track time for this request
    request_start = time.time()

    try:
        if not ANTHROPIC_API_KEY:
            raise ValueError("ANTHROPIC API key not set. Please set ANTHROPIC_API_KEY environment variable or pass it via command line.")

        session = create_anthropic_session()

        headers = {
            "x-api-key": ANTHROPIC_API_KEY,
            "anthropic-version": "2023-06-01",
            "Content-Type": "application/json"
        }

        data = {
            "model": model,
            "max_tokens": max_tokens,
            "temperature": temperature,
            "messages": [{"role": "user", "content": prompt}]
        }

        logger.info(f"Calling API: model={model}, max_tokens={max_tokens}")

        retries = 3
        backoff = 5  # Initial wait time in seconds

        for attempt in range(retries):
            try:
                response = session.post(
                    f"{ANTHROPIC_API_BASE}messages",
                    headers=headers,
                    json=data,
                    timeout=120
                )

                response.raise_for_status()
                response_json = response.json()

                if "content" in response_json and len(response_json["content"]) > 0:
                    return response_json["content"][0]["text"]
                else:
                    logger.error(f"Content not found in API response: {response_json}")
                    return ""
            except requests.exceptions.HTTPError as e:
                if e.response.status_code == 429:
                    wait_time = backoff * (2 ** attempt)
                    logger.warning(f"API rate limit exceeded, waiting {wait_time} seconds before retrying...")
                    time.sleep(wait_time)
                else:
                    logger.error(f"API call error: {str(e)}")
                    break
            except Exception as e:
                logger.error(f"API call failed: {str(e)}")
                logger.error(traceback.format_exc())
                break

        logger.error("API call retry limit exceeded")
        return ""
    except Exception as e:
        # Track request time even for exceptions
        request_time = time.time() - request_start
        llm_metrics["request_times"].append(request_time)

        logging.error(f"Exception in call_anthropic_api: {str(e)}")
        return f"ERROR: {str(e)}"

def call_deepseek_api(prompt, model=DEEPSEEK_DEFAULT_MODEL, max_tokens=DEEPSEEK_DEFAULT_MAX_TOKENS, temperature=DEEPSEEK_DEFAULT_TEMPERATURE):
    """
    Call the DeepSeek API with the provided prompt.

    Parameters:
    prompt (str): The prompt to send to the API
    model (str): Model to use, defaults to DEEPSEEK_DEFAULT_MODEL
    max_tokens (int): Maximum tokens in response, defaults to DEEPSEEK_DEFAULT_MAX_TOKENS
    temperature (float): Temperature for response generation, defaults to DEEPSEEK_DEFAULT_TEMPERATURE

    Returns:
    str: The model's response
    """
    global llm_metrics

    # Initialize metrics if this is the first call
    if llm_metrics["start_time"] is None:
        reset_llm_metrics()

    # Track number of requests
    llm_metrics["request_count"] += 1

    # Estimate token size of the prompt
    estimated_token_size = _estimate_token_size(prompt)
    llm_metrics["token_sizes"].append(estimated_token_size)

    # Track time for this request
    request_start = time.time()

    try:
        if not DEEPSEEK_API_KEY:
            raise ValueError("DeepSeek API key not set. Please set DEEPSEEK_API_KEY variable or environment variable.")

        session = create_session()

        headers = {
            "Authorization": f"Bearer {DEEPSEEK_API_KEY}",
            "Content-Type": "application/json"
        }

        data = {
            "model": model,
            "messages": [{"role": "user", "content": prompt}],
            "max_tokens": max_tokens,
            "temperature": temperature
        }

        logger.info(f"Calling DeepSeek API: model={model}, max_tokens={max_tokens}")

        retries = 3
        backoff = 5  # Initial wait time in seconds

        for attempt in range(retries):
            try:
                response = session.post(
                    f"{DEEPSEEK_API_BASE}/chat/completions",
                    headers=headers,
                    json=data,
                    timeout=120
                )

                response.raise_for_status()
                response_json = response.json()

                if "choices" in response_json and len(response_json["choices"]) > 0:
                    return response_json["choices"][0]["message"]["content"]
                else:
                    logger.error(f"Choices not found in API response: {response_json}")
                    return ""
            except requests.exceptions.HTTPError as e:
                if e.response.status_code == 429:
                    wait_time = backoff * (2 ** attempt)
                    logger.warning(f"API rate limit exceeded, waiting {wait_time} seconds before retrying...")
                    time.sleep(wait_time)
                else:
                    logger.error(f"API call error: {str(e)}")
                    break
            except Exception as e:
                logger.error(f"API call failed: {str(e)}")
                logger.error(traceback.format_exc())
                break

        logger.error("API call retry limit exceeded")
        return ""
    except Exception as e:
        # Track request time even for exceptions
        request_time = time.time() - request_start
        llm_metrics["request_times"].append(request_time)

        logging.error(f"Exception in call_deepseek_api: {str(e)}")
        return f"ERROR: {str(e)}"

def clean_forbidden_comments(code):
    """Clean up forbidden placeholder comments from Java code"""
    # List of forbidden comment patterns
    forbidden_patterns = [
        r'//\s*\.\.\.\s*existing\s+code\s*\.\.\.',
        r'//\s*\[Previous\s+imports\s+remain\s+exactly\s+the\s+same\]',
        r'//\s*\[Previous\s+imports\s+remain\s+exactly\s+as\s+shown\s+in\s+the\s+original\s+code\]',
        r'//\s*\[Previous\s+test\s+methods\s+remain\s+exactly\s+as\s+shown\s+in\s+the\s+original\s+code\]',
        r'//\s*All\s+previous\s+fields\s+and\s+methods\s+remain\s+exactly\s+the\s+same',
        r'//\s*existing\s+code\s*\.\.\.',
        r'/\*\s*\.\.\.\s*existing\s+code\s*\.\.\.\s*\*/',
    ]

    cleaned_code = code
    for pattern in forbidden_patterns:
        cleaned_code = re.sub(pattern, '', cleaned_code, flags=re.IGNORECASE)

    # Remove empty lines that might be left after removing comments
    lines = cleaned_code.split('\n')
    cleaned_lines = [line for line in lines if line.strip() or not line.strip().startswith('//')]

    return '\n'.join(cleaned_lines)

def extract_java_code(text):
    """提取更可靠的Java代码提取"""
    # 首先尝试匹配单个完整的Java代码块（整个类）
    class_pattern = re.compile(r'```java\s*((?:public\s+)?(?:class|interface|enum)\s+\w+[\s\S]*?)\s*```', re.DOTALL)
    class_match = class_pattern.search(text)

    if class_match:
        extracted_code = class_match.group(1)
        # 确保代码是完整的，包含了完整的类定义，而不仅仅是片段
        if "class " in extracted_code and "{" in extracted_code and extracted_code.strip().endswith("}"):
            # Clean up forbidden comments before returning
            extracted_code = clean_forbidden_comments(extracted_code)
            return extracted_code

    # 如果没有找到完整类，则收集所有Java代码块并连接
    java_pattern = re.compile(r'```java\s*(.*?)\s*```', re.DOTALL)
    matches = java_pattern.findall(text)

    if matches:
        # 检查是否有一个代码块包含完整类定义
        for match in matches:
            if "class " in match and "{" in match and match.strip().endswith("}"):
                return clean_forbidden_comments(match)

        # 如果没有完整类，但有代码块，使用最长的那个
        if len(matches) == 1:
            return clean_forbidden_comments(matches[0])
        else:
            # 如果有多个代码块，尝试智能合并它们
            combined_code = "\n\n".join(matches)
            # 检查合并后的代码是否是完整的类
            if "class " in combined_code and "{" in combined_code and combined_code.strip().endswith("}"):
                return clean_forbidden_comments(combined_code)
            else:
                # 如果合并后的代码不完整，返回最长的代码块
                return clean_forbidden_comments(max(matches, key=len))

    # 回退到任意代码块
    code_pattern = re.compile(r'```\s*(.*?)\s*```', re.DOTALL)
    matches = code_pattern.findall(text)

    if matches:
        # 尝试找到包含完整类定义的代码块
        for match in matches:
            if "class " in match and "{" in match and match.strip().endswith("}"):
                return clean_forbidden_comments(match)

        # 否则返回最长的代码块
        return clean_forbidden_comments(max(matches, key=len))

    # 最后的尝试：提取引号之间的任何内容，如果看起来像Java代码
    if "public class" in text or "import " in text:
        # 尝试提取从class声明到最后一个花括号之间的内容
        class_start = text.find("public class")
        if class_start == -1:
            class_start = text.find("class ")

        if class_start != -1:
            # 找到类开始位置后，尝试提取直到结束的内容
            open_braces = 0
            in_class = False
            class_content = []

            for line in text[class_start:].split('\n'):
                class_content.append(line)

                if '{' in line:
                    in_class = True
                    open_braces += line.count('{')

                if '}' in line:
                    open_braces -= line.count('}')

                if in_class and open_braces == 0:
                    break

            if class_content:
                return clean_forbidden_comments('\n'.join(class_content))

        # 如果还是无法提取，返回整个文本，它可能包含Java代码
        return clean_forbidden_comments(text)

    # 如果上述方法都失败，返回原始文本
    return clean_forbidden_comments(text)

def generate_initial_test(test_prompt_file, source_code):
    """
    Generate initial unit test

    Parameters:
    test_prompt_file (str): Path to test prompt file
    source_code (str): Source code

    Returns:
    str: Generated test code
    """
    try:
        with open(test_prompt_file, 'r', encoding='utf-8') as f:
            prompt_content = f.read()
    except Exception as e:
        logger.error(f"Failed to read prompt file: {str(e)}")
        return ""

    prompt = f"""
{prompt_content}


Please provide the complete test class code, including all necessary imports and annotations. Ensure that your tests are thorough, covering all aspects of the class behavior while considering the provided structure, data flow, and dependencies.

Important notes:
1. Remember to import all necessary classes as listed in the Imports section.
2. In your test class, explicitly verify that the class implements all listed interfaces and extends the superclass (if any).
3. When testing overridden methods, add comments indicating which interface or superclass they are inherited from.
4. DO NOT use @Nested annotations or nested test classes, as they cause coverage tracking issues.
5. Always provide a complete, well-structured test class that will compile without any modifications.
6. Use straightforward test methods without nesting to ensure proper coverage tracking.

STRICT ANTI-MOCKING REQUIREMENTS:
- ABSOLUTELY NO use of any mocking frameworks (Mockito, EasyMock, PowerMock, etc.)
- ABSOLUTELY NO @Mock, @MockBean, @InjectMocks, or any mock-related annotations
- ABSOLUTELY NO imports from org.mockito.* or static imports from Mockito
- ABSOLUTELY NO mock(), when(), verify(), or any mocking methods
- Use ONLY real objects and direct instantiation for testing
- Create real instances of dependencies instead of mocks
- Focus on testing actual behavior with real object interactions

Please generate a complete JUnit test class, ensuring coverage of all main functionality.
Use JUnit 5 (Jupiter) annotations and assertions. Please follow all testing requirements in the prompt.

CRITICAL ANTI-PLACEHOLDER REQUIREMENTS:
- YOUR RESPONSE MUST CONTAIN THE COMPLETE TEST CLASS CODE
- DO NOT OMIT ANY PARTS OF THE CODE OR USE PLACEHOLDERS
- FORBIDDEN: "// ... existing code ...", "// [Previous imports remain exactly the same]", "// All previous fields and methods remain exactly the same"
- REQUIRED: Every single import, field, and method must be written out in full
- NO shortcuts, abbreviations, or comments indicating omitted code are allowed
- Your response must be compilable Java code that can be directly saved to a file
"""

    logger.info(f"Generating initial test, prompt length: {len(prompt)}")
    # response = call_gpt_api(prompt)
    response = call_anthropic_api(prompt)
    # response = call_deepseek_api(prompt)
    if not response:
        logger.error("API returned empty response")
        return ""

    # Extract Java code
    test_code = extract_java_code(response)
    logger.info(f"Extracted test code length: {len(test_code)}")

    return test_code

# Define Apache License text
APACHE_LICENSE = """/*
  Licensed to the Apache Software Foundation (ASF) under one or more
  contributor license agreements.  See the NOTICE file distributed with
  this work for additional information regarding copyright ownership.
  The ASF licenses this file to You under the Apache License, Version 2.0
  (the "License"); you may not use this file except in compliance with
  the License.  You may obtain a copy of the License at

      http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License.
 */
"""

def save_test_code(test_code, class_name, package_name, project_dir):
    """
    Save test code to file

    Parameters:
    test_code (str): Test code
    class_name (str): Name of the class being tested
    package_name (str): Package name
    project_dir (str): Project directory

    Returns:
    str: Path to the saved file
    """
    # 添加空值检查
    if test_code is None:
        logger.error("Cannot save test code: test_code is None")
        return ""

    # Define possible test directory paths
    test_dirs = [
        os.path.join(project_dir, "src", "test", "java", package_name.replace(".", os.sep)),
        os.path.join(project_dir, "src", "test", "java", "test", package_name.replace(".", os.sep)),
        os.path.join(project_dir, "test", "java", package_name.replace(".", os.sep)),
    ]

    # Choose the first existing directory, create the first one if none exist
    test_dir = next((d for d in test_dirs if os.path.exists(d)), test_dirs[0])

    # Ensure directory exists
    os.makedirs(test_dir, exist_ok=True)

    # Determine test class name
    test_class_name = f"{class_name}Test"

    # Build complete file path
    file_path = os.path.join(test_dir, f"{test_class_name}.java")

    # Check if code already includes license header
    if not test_code.strip().startswith("/*"):
        # If test code doesn't contain package declaration, add it
        if not re.search(r'package\s+[\w.]+;', test_code):
            test_code = f"{APACHE_LICENSE}\npackage {package_name};\n\n{test_code}"
        else:
            # Find package declaration
            package_match = re.search(r'(package\s+[\w.]+;)', test_code)
            if package_match:
                # Insert license before package declaration
                package_stmt = package_match.group(1)
                test_code = test_code.replace(package_stmt, f"{APACHE_LICENSE}\n{package_stmt}")
            else:
                # If package declaration not found but not added, add license at the beginning
                test_code = f"{APACHE_LICENSE}\n{test_code}"

    # Save file
    try:
        with open(file_path, 'w', encoding='utf-8') as f:
            f.write(test_code)
        logger.info(f"Test code saved to: {file_path}")
        return file_path
    except Exception as e:
        logger.error(f"Failed to save test code: {str(e)}")
        return ""

def run_maven_command(command, project_dir='.'):
    """
    Run Maven command and return output and error information

    Parameters:
    command (str): Maven command
    project_dir (str): Project directory

    Returns:
    tuple: (success, stdout, stderr)
    """
    full_command = f"mvn {command} -Dlicense.skip=true"

    try:
        process = subprocess.Popen(
            full_command,
            shell=True,
            cwd=project_dir,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            text=True
        )

        stdout, stderr = process.communicate()
        success = process.returncode == 0

        return success, stdout, stderr

    except Exception as e:
        logger.error(f"Failed to run Maven command: {str(e)}")
        return False, "", str(e)

def run_gradle_command(command, project_dir='.'):
    """
    Run Gradle command and return output and error information

    Parameters:
    command (str): Gradle command
    project_dir (str): Project directory

    Returns:
    tuple: (success, stdout, stderr)
    """
    # Check if gradlew exists, otherwise use gradle
    gradlew_path = os.path.join(project_dir, 'gradlew')
    gradle_cmd = './gradlew' if os.path.exists(gradlew_path) else 'gradle'

    full_command = f"{gradle_cmd} {command}"

    try:
        process = subprocess.Popen(
            full_command,
            shell=True,
            cwd=project_dir,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            text=True
        )

        stdout, stderr = process.communicate()
        success = process.returncode == 0

        return success, stdout, stderr

    except Exception as e:
        logger.error(f"Failed to run Gradle command: {str(e)}")
        return False, "", str(e)

def run_build_command(command, project_dir='.', project_type='maven'):
    """
    Run build command based on project type

    Parameters:
    command (str): Build command
    project_dir (str): Project directory
    project_type (str): Project type ('maven' or 'gradle')

    Returns:
    tuple: (success, stdout, stderr)
    """
    if project_type.lower() == 'gradle':
        return run_gradle_command(command, project_dir)
    else:
        return run_maven_command(command, project_dir)

def remove_ansi_escape_sequences(text):
    ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
    return ansi_escape.sub('', text)

def parse_maven_errors(output):
    """
    Parse error information from Maven output with improved test failure detection.
    Extract each individual test failure as a separate error.

    Parameters:
    output (str): Maven command output

    Returns:
    tuple: (compilation_errors, assertion_failures) - Lists of different error types
    """
    if not output:
        return [], []

    # Clean ANSI codes
    output = remove_ansi_escape_sequences(output)

    compilation_errors = []
    assertion_failures = []

    # Check for OutOfMemory errors first (highest priority)
    memory_errors = re.findall(r'(java\.lang\.OutOfMemoryError:.*?)(?:\n|\r\n|\r)', output)
    if memory_errors:
        compilation_errors.append(f"Critical memory error: {memory_errors[0].strip()}")

    # Check for VM limit errors (another form of OOM)
    vm_limit_errors = re.findall(r'(Requested array size exceeds VM limit.*?)(?:\n|\r\n|\r)', output)
    if vm_limit_errors:
        compilation_errors.append(f"Critical memory error: {vm_limit_errors[0].strip()}")

    # Note: Mockito usage is prohibited - any mockito-related errors indicate code needs refactoring to use real objects

    # Extract individual test failures - this is the key improvement
    # Look for lines like: [ERROR]   SoundexTest.testEmptyInput:79 expected: <0000> but was: <>
    test_failure_pattern = r'\[ERROR\]\s+([A-Za-z0-9_.]+\.[A-Za-z0-9_]+)(?::(\d+))?\s+(.*)'
    test_failures = re.findall(test_failure_pattern, output)

    for test_class_method, line_number, error_msg in test_failures:
        # Create a formatted error message
        if line_number:
            formatted_error = f"{test_class_method}:{line_number} {error_msg.strip()}"
        else:
            formatted_error = f"{test_class_method} {error_msg.strip()}"

        assertion_failures.append(formatted_error)

    # If no specific test failures found, look for general test failure blocks
    if not assertion_failures:
        test_failure_blocks = re.findall(r'\[ERROR\] Failures:\s*(.*?)(?=\[INFO\]|\[ERROR\]|$)', output, re.DOTALL)
        if test_failure_blocks:
            for block in test_failure_blocks:
                assertion_failures.append(f"Test failures: {block.strip()}")

    # Check for general compilation errors
    compile_errors = re.findall(r'\[ERROR\] (?!Failures:)(.*?\.java:\d+:.*?)(?=\[|\n\[|\Z)', output, re.DOTALL)
    for error in compile_errors:
        compilation_errors.append(error.strip())

    # Check for build failure
    if "BUILD FAILURE" in output and not (compilation_errors or assertion_failures):
        compilation_errors.append(output)

    return compilation_errors, assertion_failures

def parse_gradle_errors(output):
    """
    Parse error information from Gradle output with improved test failure detection.
    Extract each individual test failure as a separate error.

    Parameters:
    output (str): Gradle command output

    Returns:
    tuple: (compilation_errors, assertion_failures) - Lists of different error types
    """
    if not output:
        return [], []

    # Clean ANSI codes
    output = remove_ansi_escape_sequences(output)

    compilation_errors = []
    assertion_failures = []

    # Check for OutOfMemory errors first (highest priority)
    memory_errors = re.findall(r'(java\.lang\.OutOfMemoryError:.*?)(?:\n|\r\n|\r)', output)
    if memory_errors:
        compilation_errors.append(f"Critical memory error: {memory_errors[0].strip()}")

    # Check for VM limit errors (another form of OOM)
    vm_limit_errors = re.findall(r'(Requested array size exceeds VM limit.*?)(?:\n|\r\n|\r)', output)
    if vm_limit_errors:
        compilation_errors.append(f"Critical memory error: {vm_limit_errors[0].strip()}")

    # Note: Mockito usage is prohibited - any mockito-related errors indicate code needs refactoring to use real objects

    # Check for compilation errors first - these take priority
    # Pattern 1: Standard Java compilation errors with file:line:column format
    compile_errors_detailed = re.findall(r'([^:]+\.java):(\d+):(\d+):\s*(error|warning):\s*(.*?)(?=\n|\r\n|\r|$)', output)
    for file_path, line, column, error_type, error_msg in compile_errors_detailed:
        # Extract just the filename from the full path
        filename = os.path.basename(file_path)
        if error_type == "error":  # Only treat actual errors as compilation errors
            compilation_errors.append(f"{filename}:{line}:{column}: {error_type}: {error_msg.strip()}")

    # Pattern 2: General compilation error patterns (includes "cannot find symbol", "constructor not found", etc.)
    # Look for common compilation error indicators
    compilation_indicators = [
        r'cannot find symbol',
        r'constructor .* in class .* cannot be applied to given types',
        r'cannot be applied to given types',
        r'method .* cannot be applied to given types',
        r'incompatible types',
        r'package .* does not exist',
        r'class .* is public, should be declared in a file named',
        r'duplicate class',
        r'variable .* might not have been initialized',
        r'unreachable statement',
        r'missing return statement',
        r'illegal start of expression',
        r'illegal start of type',
        r'expected'
    ]

    # Scan each line for compilation error indicators
    lines = output.split('\n')
    for i, line in enumerate(lines):
        line_stripped = line.strip()

        # Skip obvious non-error lines
        if not line_stripped or line_stripped.startswith('*') or 'help.gradle.org' in line_stripped:
            continue

        for indicator in compilation_indicators:
            if re.search(indicator, line_stripped, re.IGNORECASE):
                # If this looks like a compilation error, capture it
                # Try to get the full context (may span multiple lines)
                error_context = line_stripped

                # Look for file:line pattern in current or previous lines
                for j in range(max(0, i-2), min(len(lines), i+3)):
                    if re.search(r'\.java:\d+:', lines[j]):
                        file_line_context = lines[j].strip()
                        if file_line_context != error_context:
                            error_context = file_line_context + " " + error_context
                        break

                # Only add if it's not already present and contains actual error info
                if (error_context and
                    error_context not in compilation_errors and
                    len(error_context) > 10 and  # Must be substantial
                    any(keyword in error_context.lower() for keyword in ['error:', 'cannot', 'symbol', 'constructor'])):
                    compilation_errors.append(error_context)
                break

    # Pattern 3: Look for "error:" keyword specifically (common in Java compilation output)
    general_compile_errors = re.findall(r'(.*?\.java:\d+:.*?error:.*?)(?=\n|\r\n|\r|$)', output, re.IGNORECASE)
    for error in general_compile_errors:
        if error.strip() and error.strip() not in compilation_errors:
            compilation_errors.append(error.strip())

    # Pattern 4: Gradle-specific compilation failure patterns
    # Look for "Compilation failed" or similar messages
    if re.search(r'compilation failed|COMPILATION ERROR|BUILD FAILED.*compilation', output, re.IGNORECASE):
        # If we don't have specific errors but know compilation failed
        if not compilation_errors:
            # Try to extract more specific error information
            gradle_error_block = re.search(r'(.*(?:compilation failed|COMPILATION ERROR).*?)(?=\n\w|\Z)', output, re.DOTALL | re.IGNORECASE)
            if gradle_error_block:
                compilation_errors.append(gradle_error_block.group(1).strip())

    # Now handle test failures (only if they're not compilation errors)
    # Extract individual test failures from Gradle output
    # Gradle test failure format: org.example.TestClass > testMethod FAILED
    test_failure_pattern = r'([A-Za-z0-9_.]+)\s*>\s*([A-Za-z0-9_]+)\s+FAILED'
    test_failures = re.findall(test_failure_pattern, output)

    for test_class, test_method in test_failures:
        # Look for the specific error message for this test
        # Gradle usually shows the error after the FAILED line
        error_pattern = rf'{re.escape(test_class)}\s*>\s*{re.escape(test_method)}\s+FAILED\s*\n\s*(.*?)(?=\n\s*[A-Za-z]|\n\s*$|\Z)'
        error_match = re.search(error_pattern, output, re.DOTALL)
        if error_match:
            error_msg = error_match.group(1).strip()
            # Check if this is actually a compilation error disguised as test failure
            if any(re.search(indicator, error_msg, re.IGNORECASE) for indicator in compilation_indicators):
                compilation_errors.append(f"{test_class}.{test_method} - {error_msg}")
            else:
                formatted_error = f"{test_class}.{test_method} {error_msg}"
                assertion_failures.append(formatted_error)
        else:
            formatted_error = f"{test_class}.{test_method} FAILED"
            assertion_failures.append(formatted_error)

    # Also look for JUnit-style test failures in Gradle output (but check for compilation errors first)
    junit_failure_pattern = r'([A-Za-z0-9_.]+)\.([A-Za-z0-9_]+)(?::(\d+))?\s+(.*?)(?=\n\s*[A-Za-z]|\n\s*$|\Z)'
    junit_failures = re.findall(junit_failure_pattern, output)

    for test_class, test_method, line_number, error_msg in junit_failures:
        error_msg_stripped = error_msg.strip()

        # Skip obvious non-test failure patterns
        if (not error_msg_stripped or
            'help.gradle.org' in error_msg_stripped or
            'uses or overrides a deprecated API' in error_msg_stripped or
            len(error_msg_stripped) < 5):
            continue

        # Check if this is actually a compilation error
        if any(re.search(indicator, error_msg_stripped, re.IGNORECASE) for indicator in compilation_indicators):
            if line_number:
                compilation_errors.append(f"{test_class}.{test_method}:{line_number} {error_msg_stripped}")
            else:
                compilation_errors.append(f"{test_class}.{test_method} {error_msg_stripped}")
        else:
            # Only treat as test failure if it looks like a real test failure
            if ('expected' in error_msg_stripped.lower() or
                'assertion' in error_msg_stripped.lower() or
                'failed' in error_msg_stripped.lower()):
                if line_number:
                    formatted_error = f"{test_class}.{test_method}:{line_number} {error_msg_stripped}"
                else:
                    formatted_error = f"{test_class}.{test_method} {error_msg_stripped}"

                # Avoid duplicates
                if formatted_error not in assertion_failures:
                    assertion_failures.append(formatted_error)

    # Check for build failure
    if "BUILD FAILED" in output and not (compilation_errors or assertion_failures):
        compilation_errors.append("BUILD FAILED - check build output for details")

    return compilation_errors, assertion_failures

def find_jacoco_report(project_dir, project_type='maven'):
    """
    Find Jacoco-generated XML report file

    Parameters:
    project_dir (str): Project directory
    project_type (str): Project type ('maven' or 'gradle')

    Returns:
    str: Report file path
    """
    # Common Jacoco report paths based on project type
    if project_type.lower() == 'gradle':
        patterns = [
            os.path.join(project_dir, 'build', 'reports', 'jacoco', 'test', 'jacocoTestReport.xml'),
            os.path.join(project_dir, 'build', 'reports', 'jacoco', 'jacocoTestReport.xml'),
            os.path.join(project_dir, 'build', 'jacoco', 'test.xml'),
            os.path.join(project_dir, 'build', 'jacoco', 'jacoco.xml'),
            # Multi-module Gradle projects
            os.path.join(project_dir, 'build', 'reports', 'jacoco', 'jacocoRootReport', 'jacocoRootReport.xml'),
        ]
    else:
        patterns = [
            os.path.join(project_dir, 'target', 'site', 'jacoco', 'jacoco.xml'),
            os.path.join(project_dir, 'target', 'site', 'jacoco-ut', 'jacoco.xml'),
            os.path.join(project_dir, 'target', 'site', 'jacoco-aggregate', 'jacoco.xml'),