forked from reposense/RepoSense
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathCsvParser.java
More file actions
176 lines (154 loc) · 6.6 KB
/
CsvParser.java
File metadata and controls
176 lines (154 loc) · 6.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
package reposense.parser;
import java.io.FileReader;
import java.io.IOException;
import java.io.Reader;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.stream.Collectors;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVRecord;
import reposense.system.LogsManager;
/**
* Contains CSV parsing related functionalities.
*/
public abstract class CsvParser<T> {
protected static final String COLUMN_VALUES_SEPARATOR = ";";
protected static final Logger logger = LogsManager.getLogger(CsvParser.class);
private static final String OVERRIDE_KEYWORD = "override:";
private static final String MESSAGE_UNABLE_TO_READ_CSV_FILE = "Unable to read the supplied CSV file.";
private static final String MESSAGE_MALFORMED_LINE_FORMAT = "Line %d in CSV file, %s, is malformed.\n"
+ "Content: %s";
private static final String MESSAGE_LINE_PARSE_EXCEPTION_FORMAT = "Error parsing line %d in CSV file, %s.\n"
+ "Content: %s\n"
+ "Error: %s";
private Path csvFilePath;
/**
* @throws IOException if {@code csvFilePath} is an invalid path.
*/
public CsvParser(Path csvFilePath) throws IOException {
if (csvFilePath == null || !Files.exists(csvFilePath)) {
throw new IOException("Csv file does not exists in given path.\n"
+ "Use '-help' to list all the available subcommands and some concept guides.");
}
this.csvFilePath = csvFilePath;
}
/**
* @throws IOException if there are error accessing the given csv file.
*/
public List<T> parse() throws IOException {
List<T> results = new ArrayList<>();
Iterable<CSVRecord> records;
try (Reader csvReader = new FileReader(csvFilePath.toFile())) {
records = CSVFormat.DEFAULT.withFirstRecordAsHeader().parse(csvReader);
for (CSVRecord record : records) {
if (isLineMalformed(record)) {
continue;
}
try {
processLine(results, record);
} catch (ParseException pe) {
logger.warning(String.format(MESSAGE_LINE_PARSE_EXCEPTION_FORMAT, getLineNumber(record),
csvFilePath.getFileName(), getRowContentAsRawString(record), pe.getMessage()));
} catch (IllegalArgumentException iae) {
logger.log(Level.WARNING, iae.getMessage(), iae);
}
}
} catch (IOException ioe) {
throw new IOException(MESSAGE_UNABLE_TO_READ_CSV_FILE, ioe);
}
return results;
}
/**
* Returns true if {@code record} does not contain the same number of columns as the header or contains missing
* values at the mandatory columns in CSV format.
*/
private boolean isLineMalformed(CSVRecord record) {
if (!record.isConsistent()) {
logger.warning(String.format(MESSAGE_MALFORMED_LINE_FORMAT, getLineNumber(record),
csvFilePath.getFileName(), getRowContentAsRawString(record)));
return true;
}
for (int position : mandatoryPositions()) {
if (record.get(position).isEmpty()) {
logger.warning(String.format(MESSAGE_MALFORMED_LINE_FORMAT, getLineNumber(record),
csvFilePath.getFileName(), getRowContentAsRawString(record)));
return true;
}
}
return false;
}
/**
* Returns the value of {@code record} at {@code colNum}.
*/
protected String get(final CSVRecord record, int colNum) {
return record.get(colNum).trim();
}
/**
* Returns the value of {@code record} at {@code colNum} if present, or
* returns {@code defaultValue} otherwise.
*/
protected String getOrDefault(final CSVRecord record, int colNum, String defaultValue) {
return get(record, colNum).isEmpty() ? defaultValue : get(record, colNum);
}
/**
* Returns the value of {@code record} at {@code colNum} as a {@code List},
* delimited by {@code COLUMN_VALUES_SEPARATOR} if it is in {@code record} and not empty, or
* returns an empty {@code List} otherwise.
*/
protected List<String> getAsList(final CSVRecord record, int colNum) {
if (get(record, colNum).isEmpty()) {
return Collections.emptyList();
}
return Arrays.stream(get(record, colNum).split(COLUMN_VALUES_SEPARATOR))
.map(String::trim)
.collect(Collectors.toList());
}
/**
* Returns the values in {@code record} as a list with the {@link CsvParser#OVERRIDE_KEYWORD} prefix removed.
* Returns an empty list if {@code record} at {@code colNum} is empty.
*/
protected List<String> getAsListWithoutOverridePrefix(final CSVRecord record, int colNum) {
List<String> data = getAsList(record, colNum);
if (isElementOverridingStandaloneConfig(record, colNum)) {
data.set(0, data.get(0).replaceFirst(OVERRIDE_KEYWORD, ""));
data.removeIf(String::isEmpty);
}
return data;
}
private long getLineNumber(final CSVRecord record) {
return record.getRecordNumber() + 1;
}
/**
* Returns true if the {@code record} at {@code colNum} is prefixed with the override keyword.
*/
protected boolean isElementOverridingStandaloneConfig(final CSVRecord record, int colNum) {
return get(record, colNum).startsWith(OVERRIDE_KEYWORD);
}
/**
* Returns the contents of {@code record} as a raw string.
*/
private String getRowContentAsRawString(final CSVRecord record) {
StringBuilder inputRowString = new StringBuilder();
for (int colNum = 0; colNum < record.size(); colNum++) {
inputRowString.append(get(record, colNum)).append(",");
}
return inputRowString.toString();
}
/**
* Gets the list of positions that are mandatory for verification.
*/
protected abstract int[] mandatoryPositions();
/**
* Processes the csv file line by line.
* All CsvParsers must use {@link CsvParser#get}, {@link CsvParser#getOrDefault},
* {@link CsvParser#getAsList} or {@link CsvParser#getAsListWithoutOverridePrefix} to read contents in
* {@code record} and add created objects into {@code results}.
*/
protected abstract void processLine(List<T> results, final CSVRecord record) throws ParseException;
}