字符流实战:文本解析
学完了 Reader/Writer、FileReader/FileWriter、BufferedReader/BufferedWriter,是时候综合运用它们了。
本节实现几种常见的文本解析场景:CSV、JSON 配置、格式化日志。
读取 CSV 文件
java
public static List<String[]> readCsv(String path) throws IOException {
List<String[]> rows = new ArrayList<>();
try (BufferedReader reader = new BufferedReader(
new InputStreamReader(
new FileInputStream(path), StandardCharsets.UTF_8))) {
String line;
while ((line = reader.readLine()) != null) {
if (line.trim().isEmpty()) continue; // 跳过空行
// 简单 CSV 解析(不处理引号内的逗号)
String[] fields = line.split(",");
rows.add(fields);
}
}
return rows;
}
// 读取带引号的 CSV(处理引号内的逗号)
public static List<String[]> readCsvWithQuotes(String path)
throws IOException {
List<String[]> rows = new ArrayList<>();
try (BufferedReader reader = new BufferedReader(
new InputStreamReader(
new FileInputStream(path), StandardCharsets.UTF_8))) {
String line;
while ((line = reader.readLine()) != null) {
if (line.trim().isEmpty()) continue;
List<String> fields = new ArrayList<>();
StringBuilder field = new StringBuilder();
boolean inQuotes = false;
for (int i = 0; i < line.length(); i++) {
char c = line.charAt(i);
if (c == '"') {
inQuotes = !inQuotes;
} else if (c == ',' && !inQuotes) {
fields.add(field.toString());
field = new StringBuilder();
} else {
field.append(c);
}
}
fields.add(field.toString()); // 最后一个字段
rows.add(fields.toArray(new String[0]));
}
}
return rows;
}写入 CSV 文件
java
public static void writeCsv(String path, List<String[]> rows)
throws IOException {
try (BufferedWriter writer = new BufferedWriter(
new OutputStreamWriter(
new FileOutputStream(path), StandardCharsets.UTF_8))) {
for (String[] row : rows) {
writer.write(String.join(",", row));
writer.newLine();
}
}
}
// 写入带引号的 CSV
public static void writeCsvWithQuotes(String path, List<String[]> rows)
throws IOException {
try (BufferedWriter writer = new BufferedWriter(
new OutputStreamWriter(
new FileOutputStream(path), StandardCharsets.UTF_8))) {
for (String[] row : rows) {
StringBuilder line = new StringBuilder();
for (int i = 0; i < row.length; i++) {
String field = row[i];
if (field.contains(",") || field.contains("\"") || field.contains("\n")) {
// 需要引号包裹,并转义内部的引号
field = "\"" + field.replace("\"", "\"\"") + "\"";
}
line.append(field);
if (i < row.length - 1) line.append(",");
}
writer.write(line.toString());
writer.newLine();
}
}
}读取 properties 配置文件
java
public static Map<String, String> readProperties(String path)
throws IOException {
Map<String, String> props = new HashMap<>();
try (BufferedReader reader = new BufferedReader(
new InputStreamReader(
new FileInputStream(path), StandardCharsets.UTF_8))) {
String line;
while ((line = reader.readLine()) != null) {
line = line.trim();
if (line.isEmpty() || line.startsWith("#") || line.startsWith("!")) {
continue; // 跳过空行和注释
}
int idx = line.indexOf('=');
if (idx > 0) {
String key = line.substring(0, idx).trim();
String value = line.substring(idx + 1).trim();
props.put(key, value);
}
}
}
return props;
}
// 写入 properties 文件
public static void writeProperties(String path, Map<String, String> props)
throws IOException {
try (BufferedWriter writer = new BufferedWriter(
new OutputStreamWriter(
new FileOutputStream(path), StandardCharsets.UTF_8))) {
writer.write("# Properties file");
writer.newLine();
writer.newLine();
for (Map.Entry<String, String> entry : props.entrySet()) {
writer.write(entry.getKey());
writer.write("=");
writer.write(entry.getValue());
writer.newLine();
}
}
}读取固定宽度格式文件
java
// 固定宽度格式:每行固定宽度字段,不使用分隔符
// 例如:姓名(8字符) + 年龄(4字符) + 城市(10字符)
public static List<String[]> readFixedWidth(String path, int... widths)
throws IOException {
List<String[]> rows = new ArrayList<>();
try (BufferedReader reader = new BufferedReader(
new InputStreamReader(
new FileInputStream(path), StandardCharsets.UTF_8))) {
String line;
while ((line = reader.readLine()) != null) {
if (line.trim().isEmpty()) continue;
String[] fields = new String[widths.length];
int pos = 0;
for (int i = 0; i < widths.length; i++) {
int end = Math.min(pos + widths[i], line.length());
fields[i] = line.substring(pos, end).trim();
pos = end;
}
rows.add(fields);
}
}
return rows;
}
// 使用
List<String[]> data = readFixedWidth("fixed.txt", 8, 4, 10);统计文本文件行数、字数、字符数
java
public static class TextStats {
public long lines;
public long words;
public long chars;
public long bytes;
}
public static TextStats analyzeText(String path) throws IOException {
TextStats stats = new TextStats();
try (BufferedReader reader = new BufferedReader(
new InputStreamReader(
new FileInputStream(path), StandardCharsets.UTF_8))) {
String line;
while ((line = reader.readLine()) != null) {
stats.lines++;
stats.words += line.trim().isEmpty() ? 0 :
line.trim().split("\\s+").length;
stats.chars += line.length();
}
}
stats.bytes = Files.size(Path.of(path));
return stats;
}文本文件去重
java
public static void removeDuplicateLines(String src, String dst)
throws IOException {
Set<String> seen = new LinkedHashSet<>(); // 保持顺序
try (BufferedReader reader = new BufferedReader(
new InputStreamReader(
new FileInputStream(src), StandardCharsets.UTF_8))) {
String line;
while ((line = reader.readLine()) != null) {
seen.add(line);
}
}
try (BufferedWriter writer = new BufferedWriter(
new OutputStreamWriter(
new FileOutputStream(dst), StandardCharsets.UTF_8))) {
for (String line : seen) {
writer.write(line);
writer.newLine();
}
}
}记住这些模式:
| 场景 | 核心模式 |
|---|---|
| CSV | readLine() → split(",") → 处理引号 |
| Properties | 跳过 # 和空行 → indexOf("=") |
| 固定宽度 | substring(pos, pos + width) |
| 去重 | LinkedHashSet 保持顺序 |
文本解析的核心工具:
readLine()+split()+StringBuilder
