Skip to content

字符流实战:文本解析

学完了 Reader/Writer、FileReader/FileWriter、BufferedReader/BufferedWriter,是时候综合运用它们了。

本节实现几种常见的文本解析场景:CSV、JSON 配置、格式化日志。

读取 CSV 文件

java
public static List<String[]> readCsv(String path) throws IOException {
    List<String[]> rows = new ArrayList<>();
    try (BufferedReader reader = new BufferedReader(
            new InputStreamReader(
                new FileInputStream(path), StandardCharsets.UTF_8))) {
        String line;
        while ((line = reader.readLine()) != null) {
            if (line.trim().isEmpty()) continue; // 跳过空行
            // 简单 CSV 解析(不处理引号内的逗号)
            String[] fields = line.split(",");
            rows.add(fields);
        }
    }
    return rows;
}

// 读取带引号的 CSV(处理引号内的逗号)
public static List<String[]> readCsvWithQuotes(String path)
        throws IOException {
    List<String[]> rows = new ArrayList<>();
    try (BufferedReader reader = new BufferedReader(
            new InputStreamReader(
                new FileInputStream(path), StandardCharsets.UTF_8))) {
        String line;
        while ((line = reader.readLine()) != null) {
            if (line.trim().isEmpty()) continue;
            List<String> fields = new ArrayList<>();
            StringBuilder field = new StringBuilder();
            boolean inQuotes = false;
            for (int i = 0; i < line.length(); i++) {
                char c = line.charAt(i);
                if (c == '"') {
                    inQuotes = !inQuotes;
                } else if (c == ',' && !inQuotes) {
                    fields.add(field.toString());
                    field = new StringBuilder();
                } else {
                    field.append(c);
                }
            }
            fields.add(field.toString()); // 最后一个字段
            rows.add(fields.toArray(new String[0]));
        }
    }
    return rows;
}

写入 CSV 文件

java
public static void writeCsv(String path, List<String[]> rows)
        throws IOException {
    try (BufferedWriter writer = new BufferedWriter(
            new OutputStreamWriter(
                new FileOutputStream(path), StandardCharsets.UTF_8))) {
        for (String[] row : rows) {
            writer.write(String.join(",", row));
            writer.newLine();
        }
    }
}

// 写入带引号的 CSV
public static void writeCsvWithQuotes(String path, List<String[]> rows)
        throws IOException {
    try (BufferedWriter writer = new BufferedWriter(
            new OutputStreamWriter(
                new FileOutputStream(path), StandardCharsets.UTF_8))) {
        for (String[] row : rows) {
            StringBuilder line = new StringBuilder();
            for (int i = 0; i < row.length; i++) {
                String field = row[i];
                if (field.contains(",") || field.contains("\"") || field.contains("\n")) {
                    // 需要引号包裹,并转义内部的引号
                    field = "\"" + field.replace("\"", "\"\"") + "\"";
                }
                line.append(field);
                if (i < row.length - 1) line.append(",");
            }
            writer.write(line.toString());
            writer.newLine();
        }
    }
}

读取 properties 配置文件

java
public static Map<String, String> readProperties(String path)
        throws IOException {
    Map<String, String> props = new HashMap<>();
    try (BufferedReader reader = new BufferedReader(
            new InputStreamReader(
                new FileInputStream(path), StandardCharsets.UTF_8))) {
        String line;
        while ((line = reader.readLine()) != null) {
            line = line.trim();
            if (line.isEmpty() || line.startsWith("#") || line.startsWith("!")) {
                continue; // 跳过空行和注释
            }
            int idx = line.indexOf('=');
            if (idx > 0) {
                String key = line.substring(0, idx).trim();
                String value = line.substring(idx + 1).trim();
                props.put(key, value);
            }
        }
    }
    return props;
}

// 写入 properties 文件
public static void writeProperties(String path, Map<String, String> props)
        throws IOException {
    try (BufferedWriter writer = new BufferedWriter(
            new OutputStreamWriter(
                new FileOutputStream(path), StandardCharsets.UTF_8))) {
        writer.write("# Properties file");
        writer.newLine();
        writer.newLine();
        for (Map.Entry<String, String> entry : props.entrySet()) {
            writer.write(entry.getKey());
            writer.write("=");
            writer.write(entry.getValue());
            writer.newLine();
        }
    }
}

读取固定宽度格式文件

java
// 固定宽度格式:每行固定宽度字段,不使用分隔符
// 例如:姓名(8字符) + 年龄(4字符) + 城市(10字符)
public static List<String[]> readFixedWidth(String path, int... widths)
        throws IOException {
    List<String[]> rows = new ArrayList<>();
    try (BufferedReader reader = new BufferedReader(
            new InputStreamReader(
                new FileInputStream(path), StandardCharsets.UTF_8))) {
        String line;
        while ((line = reader.readLine()) != null) {
            if (line.trim().isEmpty()) continue;
            String[] fields = new String[widths.length];
            int pos = 0;
            for (int i = 0; i < widths.length; i++) {
                int end = Math.min(pos + widths[i], line.length());
                fields[i] = line.substring(pos, end).trim();
                pos = end;
            }
            rows.add(fields);
        }
    }
    return rows;
}

// 使用
List<String[]> data = readFixedWidth("fixed.txt", 8, 4, 10);

统计文本文件行数、字数、字符数

java
public static class TextStats {
    public long lines;
    public long words;
    public long chars;
    public long bytes;
}

public static TextStats analyzeText(String path) throws IOException {
    TextStats stats = new TextStats();
    try (BufferedReader reader = new BufferedReader(
            new InputStreamReader(
                new FileInputStream(path), StandardCharsets.UTF_8))) {
        String line;
        while ((line = reader.readLine()) != null) {
            stats.lines++;
            stats.words += line.trim().isEmpty() ? 0 :
                           line.trim().split("\\s+").length;
            stats.chars += line.length();
        }
    }
    stats.bytes = Files.size(Path.of(path));
    return stats;
}

文本文件去重

java
public static void removeDuplicateLines(String src, String dst)
        throws IOException {
    Set<String> seen = new LinkedHashSet<>(); // 保持顺序
    try (BufferedReader reader = new BufferedReader(
            new InputStreamReader(
                new FileInputStream(src), StandardCharsets.UTF_8))) {
        String line;
        while ((line = reader.readLine()) != null) {
            seen.add(line);
        }
    }
    try (BufferedWriter writer = new BufferedWriter(
            new OutputStreamWriter(
                new FileOutputStream(dst), StandardCharsets.UTF_8))) {
        for (String line : seen) {
            writer.write(line);
            writer.newLine();
        }
    }
}

记住这些模式

场景核心模式
CSVreadLine()split(",") → 处理引号
Properties跳过 # 和空行 → indexOf("=")
固定宽度substring(pos, pos + width)
去重LinkedHashSet 保持顺序

文本解析的核心工具:readLine() + split() + StringBuilder

基于 VitePress 构建