From a0d8eb97272ac8d5b85f580ee77718562a1126d7 Mon Sep 17 00:00:00 2001 From: Ramit Gupta Date: Thu, 26 Mar 2026 16:08:06 +0530 Subject: [PATCH 1/2] TEZ-4699:Add Validation/Canonical checks to avoid path exploitation in CSVResult.java --- .../org/apache/tez/analyzer/CSVResult.java | 63 +++++++---- .../apache/tez/analyzer/TestCSVResult.java | 106 ++++++++++++++++++ 2 files changed, 145 insertions(+), 24 deletions(-) create mode 100644 tez-tools/analyzers/job-analyzer/src/test/java/org/apache/tez/analyzer/TestCSVResult.java diff --git a/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/CSVResult.java b/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/CSVResult.java index fef2d5eb40..df4e9a7cef 100644 --- a/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/CSVResult.java +++ b/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/CSVResult.java @@ -20,10 +20,12 @@ import java.io.BufferedWriter; import java.io.File; -import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStreamWriter; -import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardOpenOption; import java.util.Arrays; import java.util.Collections; import java.util.Comparator; @@ -93,31 +95,44 @@ public void setComments(String comments) { '}'; } - //For testing public void dumpToFile(String fileName) throws IOException { - OutputStreamWriter writer = new OutputStreamWriter( - new FileOutputStream(new File(fileName)), - Charset.forName("UTF-8").newEncoder()); - BufferedWriter bw = new BufferedWriter(writer); - bw.write(Joiner.on(",").join(headers)); - bw.newLine(); - for (String[] record : recordsList) { - - if (record.length != headers.length) { - continue; //LOG error msg? - } - - StringBuilder sb = new StringBuilder(); - for(int i=0;i + * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tez.analyzer; + + +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.nio.file.FileAlreadyExistsException; +import java.nio.file.Files; +import java.nio.file.Path; + +import org.junit.Assert; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Unit tests for {@link CSVResult#dumpToFile(String)} (validation and atomic create). + */ +public class TestCSVResult { + + @Rule + public TemporaryFolder tmpDir = new TemporaryFolder(); + private static final Logger LOG = LoggerFactory.getLogger(TestCSVResult.class); + + @Test + public void testDumpToFileWritesContent() throws Exception { + Path out = tmpDir.newFolder().toPath().resolve("out.csv"); + CSVResult result = new CSVResult(new String[] { "h1", "h2" }); + result.addRecord(new String[] { "a", "b" }); + result.dumpToFile(out.toString()); + LOG.info("output file path is : {}", out.getFileName() ); + String content = new String(Files.readAllBytes(out), StandardCharsets.UTF_8); + Assert.assertEquals("h1,h2\na,b\n", content); + } + + @Test + public void testDumpToFileRejectsExistingFile() throws Exception { + Path out = tmpDir.newFile("existing.csv").toPath(); + CSVResult result = new CSVResult(new String[] { "x" }); + try { + result.dumpToFile(out.toString()); + Assert.fail("Expected FileAlreadyExistsException when output file already exists"); + } catch (FileAlreadyExistsException e) { + // CREATE_NEW must fail if path already exists (atomic exclusive create) + } + } + + @Test + public void testDumpToFileRejectsMissingParentDir() throws Exception { + Path missingParent = tmpDir.getRoot().toPath().resolve("no-such-dir"); + Path out = missingParent.resolve("out.csv"); + CSVResult result = new CSVResult(new String[] { "x" }); + try { + result.dumpToFile(out.toString()); + Assert.fail("Expected IOException when parent directory does not exist"); + } catch (IOException e) { + Assert.assertTrue(e.getMessage().contains("Parent directory does not exist")); + } + } + + @Test(expected = IllegalArgumentException.class) + public void testDumpToFileRejectsNullFileName() throws Exception { + new CSVResult(new String[] { "x" }).dumpToFile(null); + } + + @Test(expected = IllegalArgumentException.class) + public void testDumpToFileRejectsEmptyFileName() throws Exception { + new CSVResult(new String[] { "x" }).dumpToFile(""); + } + + @Test(expected = IllegalArgumentException.class) + public void testDumpToFileRejectsBlankFileName() throws Exception { + new CSVResult(new String[] { "x" }).dumpToFile(" "); + } + + @Test + public void testDumpToFileNestedDirectory() throws Exception { + Path base = tmpDir.newFolder().toPath(); + Path nested = base.resolve("a").resolve("b"); + Files.createDirectories(nested); + Path out = nested.resolve("nested.csv"); + CSVResult result = new CSVResult(new String[] { "h" }); + result.addRecord(new String[] { "r" }); + result.dumpToFile(out.toString()); + Assert.assertEquals("h\nr\n", new String(Files.readAllBytes(out), StandardCharsets.UTF_8)); + } +} From ff575291a8105011f48822eb24e3a50440aaa4c5 Mon Sep 17 00:00:00 2001 From: Ramit Gupta Date: Fri, 27 Mar 2026 18:36:04 +0530 Subject: [PATCH 2/2] TEZ-4699:Add Validation/Canonical checks to avoid path exploitation in CSVResult.java --- .../org/apache/tez/analyzer/CSVResult.java | 28 ++- .../apache/tez/analyzer/TestCSVResult.java | 164 +++++++++++------- 2 files changed, 116 insertions(+), 76 deletions(-) diff --git a/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/CSVResult.java b/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/CSVResult.java index df4e9a7cef..fbc4275f0b 100644 --- a/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/CSVResult.java +++ b/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/CSVResult.java @@ -25,6 +25,7 @@ import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; +import java.nio.file.Paths; import java.nio.file.StandardOpenOption; import java.util.Arrays; import java.util.Collections; @@ -71,7 +72,7 @@ public Iterator getRecordsIterator() { return Iterators.unmodifiableIterator(recordsList.iterator()); } - @SuppressWarnings({ "rawtypes", "unchecked" }) + @SuppressWarnings({"rawtypes", "unchecked"}) public void sort(Comparator comparator) { Collections.sort(recordsList, comparator); } @@ -80,15 +81,18 @@ public void setComments(String comments) { this.comments = comments; } - @Override public String toJson() throws TezException { + @Override + public String toJson() throws TezException { return ""; } - @Override public String getComments() { + @Override + public String getComments() { return comments; } - @Override public String toString() { + @Override + public String toString() { return "CSVResult{" + "headers=" + Arrays.toString(headers) + ", recordsList=" + recordsList + @@ -124,15 +128,21 @@ public void dumpToFile(String fileName) throws IOException { private static File validateOutputFile(String fileName) throws IOException { if (fileName == null || fileName.trim().isEmpty()) { - throw new IllegalArgumentException("fileName must not be null or empty"); + throw new IllegalArgumentException("File name must not be null or empty"); } - File target = new File(fileName).getCanonicalFile(); - File parent = target.getParentFile(); + Path baseDir = Paths.get(System.getProperty("user.dir")).toAbsolutePath().normalize(); + Path targetPath = Paths.get(fileName).toAbsolutePath().normalize(); - if (parent == null || !parent.isDirectory()) { + if (!targetPath.startsWith(baseDir)) { + throw new IOException("Path escapes the allowed base directory. Path: " + targetPath + ", Base: " + baseDir); + } + + Path parent = targetPath.getParent(); + if (parent == null || !Files.isDirectory(parent)) { throw new IOException("Parent directory does not exist: " + parent); } - return target; + + return targetPath.toFile(); } } diff --git a/tez-tools/analyzers/job-analyzer/src/test/java/org/apache/tez/analyzer/TestCSVResult.java b/tez-tools/analyzers/job-analyzer/src/test/java/org/apache/tez/analyzer/TestCSVResult.java index cc052e35c5..e1943128d8 100644 --- a/tez-tools/analyzers/job-analyzer/src/test/java/org/apache/tez/analyzer/TestCSVResult.java +++ b/tez-tools/analyzers/job-analyzer/src/test/java/org/apache/tez/analyzer/TestCSVResult.java @@ -18,89 +18,119 @@ package org.apache.tez.analyzer; - import java.io.IOException; import java.nio.charset.StandardCharsets; import java.nio.file.FileAlreadyExistsException; import java.nio.file.Files; import java.nio.file.Path; +import java.nio.file.Paths; +import org.junit.After; import org.junit.Assert; -import org.junit.Rule; +import org.junit.Before; import org.junit.Test; -import org.junit.rules.TemporaryFolder; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -/** - * Unit tests for {@link CSVResult#dumpToFile(String)} (validation and atomic create). - */ public class TestCSVResult { - @Rule - public TemporaryFolder tmpDir = new TemporaryFolder(); - private static final Logger LOG = LoggerFactory.getLogger(TestCSVResult.class); + private Path testDir; + + @Before + public void setUp() throws IOException { + testDir = Paths.get(System.getProperty("user.dir") + "/test"); + Files.createDirectories(testDir); + } + + @After + public void tearDown() throws IOException { + if (Files.exists(testDir)) { + Files.walk(testDir) + .sorted(java.util.Comparator.reverseOrder()) + .forEach(path -> path.toFile().delete()); + } + } @Test public void testDumpToFileWritesContent() throws Exception { - Path out = tmpDir.newFolder().toPath().resolve("out.csv"); - CSVResult result = new CSVResult(new String[] { "h1", "h2" }); - result.addRecord(new String[] { "a", "b" }); + Path dir = Files.createDirectories(testDir.resolve("test1")); + Path out = dir.resolve("out.csv"); + + CSVResult result = new CSVResult(new String[]{"h1", "h2"}); + result.addRecord(new String[]{"a", "b"}); result.dumpToFile(out.toString()); - LOG.info("output file path is : {}", out.getFileName() ); - String content = new String(Files.readAllBytes(out), StandardCharsets.UTF_8); + + String content = Files.readString(out, StandardCharsets.UTF_8); Assert.assertEquals("h1,h2\na,b\n", content); } - @Test - public void testDumpToFileRejectsExistingFile() throws Exception { - Path out = tmpDir.newFile("existing.csv").toPath(); - CSVResult result = new CSVResult(new String[] { "x" }); - try { - result.dumpToFile(out.toString()); - Assert.fail("Expected FileAlreadyExistsException when output file already exists"); - } catch (FileAlreadyExistsException e) { - // CREATE_NEW must fail if path already exists (atomic exclusive create) - } - } - - @Test - public void testDumpToFileRejectsMissingParentDir() throws Exception { - Path missingParent = tmpDir.getRoot().toPath().resolve("no-such-dir"); - Path out = missingParent.resolve("out.csv"); - CSVResult result = new CSVResult(new String[] { "x" }); - try { - result.dumpToFile(out.toString()); - Assert.fail("Expected IOException when parent directory does not exist"); - } catch (IOException e) { - Assert.assertTrue(e.getMessage().contains("Parent directory does not exist")); - } - } - - @Test(expected = IllegalArgumentException.class) - public void testDumpToFileRejectsNullFileName() throws Exception { - new CSVResult(new String[] { "x" }).dumpToFile(null); - } - - @Test(expected = IllegalArgumentException.class) - public void testDumpToFileRejectsEmptyFileName() throws Exception { - new CSVResult(new String[] { "x" }).dumpToFile(""); - } - - @Test(expected = IllegalArgumentException.class) - public void testDumpToFileRejectsBlankFileName() throws Exception { - new CSVResult(new String[] { "x" }).dumpToFile(" "); - } - - @Test - public void testDumpToFileNestedDirectory() throws Exception { - Path base = tmpDir.newFolder().toPath(); - Path nested = base.resolve("a").resolve("b"); - Files.createDirectories(nested); - Path out = nested.resolve("nested.csv"); - CSVResult result = new CSVResult(new String[] { "h" }); - result.addRecord(new String[] { "r" }); - result.dumpToFile(out.toString()); - Assert.assertEquals("h\nr\n", new String(Files.readAllBytes(out), StandardCharsets.UTF_8)); - } + @Test + public void testDumpToFileRejectsExistingFile() throws Exception { + Path out = testDir.resolve("existing.csv"); + Files.createFile(out); + + CSVResult result = new CSVResult(new String[]{"x"}); + + try { + result.dumpToFile(out.toString()); + Assert.fail("Expected FileAlreadyExistsException when output file already exists"); + } catch (FileAlreadyExistsException e) { + Assert.assertTrue(e.getMessage().contains(out.toString())); + } + } + + @Test + public void testDumpToFileRejectsMissingParentDir() throws Exception { + Path missingParent = testDir.resolve("no-such-dir"); + Path out = missingParent.resolve("out.csv"); + + CSVResult result = new CSVResult(new String[]{"x"}); + + try { + result.dumpToFile(out.toString()); + Assert.fail("Expected IOException when parent directory does not exist"); + } catch (IOException e) { + Assert.assertTrue(e.getMessage().contains("Parent directory does not exist")); + } + } + + @Test(expected = IllegalArgumentException.class) + public void testDumpToFileRejectsNullFileName() throws Exception { + new CSVResult(new String[]{"x"}).dumpToFile(null); + } + + @Test(expected = IllegalArgumentException.class) + public void testDumpToFileRejectsEmptyFileName() throws Exception { + new CSVResult(new String[]{"x"}).dumpToFile(""); + } + + @Test(expected = IllegalArgumentException.class) + public void testDumpToFileRejectsBlankFileName() throws Exception { + new CSVResult(new String[]{"x"}).dumpToFile(" "); + } + + @Test + public void testDumpToFileNestedDirectory() throws Exception { + Path nested = testDir.resolve("a").resolve("b"); + Files.createDirectories(nested); + + Path out = nested.resolve("nested.csv"); + + CSVResult result = new CSVResult(new String[]{"h"}); + result.addRecord(new String[]{"r"}); + result.dumpToFile(out.toString()); + + Assert.assertEquals("h\nr\n", Files.readString(out, StandardCharsets.UTF_8)); + } + + @Test + public void testDumpToFileNoWriteUpwardDirPath() throws Exception { + String relativePath = testDir + "/../../out.csv"; + CSVResult result = new CSVResult(new String[]{"h"}); + + try { + result.dumpToFile(relativePath); + Assert.fail("Expected IOException due to moving upwards from pwd"); + } catch (IOException e) { + Assert.assertTrue(e.getMessage().contains(Paths.get(relativePath).toAbsolutePath().normalize().toString())); + } + } }