From 61aa0555d60c68120914b4952232f8c6bfc72ed3 Mon Sep 17 00:00:00 2001 From: rootvector2 Date: Fri, 19 Jun 2026 23:39:51 +0530 Subject: [PATCH] escape leading comment marker in printWithEscapes --- .../org/apache/commons/csv/CSVFormat.java | 14 ++++- .../apache/commons/csv/CSVPrinterTest.java | 51 +++++++++++++++++++ 2 files changed, 63 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/apache/commons/csv/CSVFormat.java b/src/main/java/org/apache/commons/csv/CSVFormat.java index 9c403d9e1..eaa8c8ffe 100644 --- a/src/main/java/org/apache/commons/csv/CSVFormat.java +++ b/src/main/java/org/apache/commons/csv/CSVFormat.java @@ -2329,12 +2329,16 @@ private void printWithEscapes(final CharSequence charSeq, final Appendable appen final char escape = getEscapeChar(); final boolean quoteSet = isQuoteCharacterSet(); final char quote = quoteSet ? getQuoteCharacter().charValue() : 0; + final boolean commentMarkerSet = isCommentMarkerSet(); + final char commentChar = commentMarkerSet ? commentMarker.charValue() : 0; // Explicit unboxing is intentional while (pos < end) { char c = charSeq.charAt(pos); final boolean isDelimiterStart = isDelimiter(c, charSeq, pos, delimArray, delimLength); final boolean isCr = c == Constants.CR; final boolean isLf = c == Constants.LF; - if (isCr || isLf || c == escape || quoteSet && c == quote || isDelimiterStart) { + // A leading comment marker would be read back as a comment, so escape it. + final boolean isComment = commentMarkerSet && pos == 0 && c == commentChar; + if (isCr || isLf || c == escape || quoteSet && c == quote || isDelimiterStart || isComment) { // write out segment up until this char if (pos > start) { appendable.append(charSeq, start, pos); @@ -2375,8 +2379,11 @@ private void printWithEscapes(final Reader reader, final Appendable appendable) final char escape = getEscapeChar(); final boolean quoteSet = isQuoteCharacterSet(); final char quote = quoteSet ? getQuoteCharacter().charValue() : 0; + final boolean commentMarkerSet = isCommentMarkerSet(); + final char commentChar = commentMarkerSet ? commentMarker.charValue() : 0; // Explicit unboxing is intentional final StringBuilder builder = new StringBuilder(IOUtils.DEFAULT_BUFFER_SIZE); int c; + boolean firstChar = true; final char[] lookAheadBuffer = new char[delimLength - 1]; while (EOF != (c = bufferedReader.read())) { builder.append((char) c); @@ -2386,7 +2393,10 @@ private void printWithEscapes(final Reader reader, final Appendable appendable) final boolean isDelimiterStart = isDelimiter((char) c, test, pos, delimArray, delimLength); final boolean isCr = c == Constants.CR; final boolean isLf = c == Constants.LF; - if (isCr || isLf || c == escape || quoteSet && c == quote || isDelimiterStart) { + // A leading comment marker would be read back as a comment, so escape it. + final boolean isComment = commentMarkerSet && firstChar && c == commentChar; + firstChar = false; + if (isCr || isLf || c == escape || quoteSet && c == quote || isDelimiterStart || isComment) { // write out segment up until this char if (pos > start) { append(builder.substring(start, pos), appendable); diff --git a/src/test/java/org/apache/commons/csv/CSVPrinterTest.java b/src/test/java/org/apache/commons/csv/CSVPrinterTest.java index e68d4c243..9ae80c1e5 100644 --- a/src/test/java/org/apache/commons/csv/CSVPrinterTest.java +++ b/src/test/java/org/apache/commons/csv/CSVPrinterTest.java @@ -569,6 +569,57 @@ void testEscapeBackslash5() throws IOException { assertEquals("\\\\", sw.toString()); } + @Test + void testEscapeCommentMarkerFirstChar() throws IOException { + // No quoting available in escape mode, so a leading comment marker must be escaped or the + // record reads back as a comment and is dropped. Mirrors the quoting fix for QuoteMode.MINIMAL. + final CSVFormat format = CSVFormat.DEFAULT.builder().setQuote(null).setEscape('\\').setCommentMarker(';').get(); + final StringWriter sw = new StringWriter(); + final String col1 = ";comment-like"; + try (CSVPrinter printer = new CSVPrinter(sw, format)) { + printer.printRecord(col1, "b"); + printer.printRecord(new StringReader(col1), new StringReader("b")); + // The marker past the first character does not start a comment and is left alone. + printer.printRecord("a;b", ";c"); + } + final String string = sw.toString(); + assertEquals("\\;comment-like,b" + RECORD_SEPARATOR + + "\\;comment-like,b" + RECORD_SEPARATOR + + "a;b,\\;c" + RECORD_SEPARATOR, string); + // The emitted records must read back as the original values, none parsed as a comment. + try (CSVParser parser = CSVParser.parse(string, format)) { + final List records = parser.getRecords(); + assertEquals(3, records.size()); + assertEquals(col1, records.get(0).get(0)); + assertEquals("b", records.get(0).get(1)); + assertEquals(col1, records.get(1).get(0)); + assertEquals("b", records.get(1).get(1)); + assertEquals("a;b", records.get(2).get(0)); + assertEquals(";c", records.get(2).get(1)); + } + } + + @Test + void testEscapeCommentMarkerFirstCharWithQuoteModeNone() throws IOException { + final CSVFormat format = CSVFormat.DEFAULT.builder().setEscape('\\').setQuoteMode(QuoteMode.NONE).setCommentMarker(';').get(); + final StringWriter sw = new StringWriter(); + final String col1 = ";bar"; + try (CSVPrinter printer = new CSVPrinter(sw, format)) { + printer.printRecord(col1, "b"); + printer.printRecord(new StringReader(col1), new StringReader("b")); + } + final String string = sw.toString(); + assertEquals("\\;bar,b" + RECORD_SEPARATOR + "\\;bar,b" + RECORD_SEPARATOR, string); + try (CSVParser parser = CSVParser.parse(string, format)) { + final List records = parser.getRecords(); + assertEquals(2, records.size()); + for (final CSVRecord record : records) { + assertEquals(col1, record.get(0)); + assertEquals("b", record.get(1)); + } + } + } + @Test void testEscapeNull1() throws IOException { final StringWriter sw = new StringWriter();