apache · andygrove · Apr 21, 2026 · Apr 21, 2026 · Apr 21, 2026 · Apr 21, 2026
diff --git a/.github/actions/java-test/action.yaml b/.github/actions/java-test/action.yaml
@@ -29,10 +29,6 @@ inputs:
     description: 'Maven options passed to the mvn command'
     required: false
     default: ''
-  scan_impl:
-    description: 'The default Parquet scan implementation'
-    required: false
-    default: 'auto'
   upload-test-reports:
     description: 'Whether to upload test results including coverage to GitHub'
     required: false
@@ -72,7 +68,6 @@ runs:
       shell: bash
       if: ${{ inputs.suites == '' }}
       env:
-        COMET_PARQUET_SCAN_IMPL: ${{ inputs.scan_impl }}
         SPARK_LOCAL_HOSTNAME: "localhost"
         SPARK_LOCAL_IP: "127.0.0.1"
       run: |
@@ -81,7 +76,6 @@ runs:
       shell: bash
       if: ${{ inputs.suites != '' }}
       env:
-        COMET_PARQUET_SCAN_IMPL: ${{ inputs.scan_impl }}
         SPARK_LOCAL_HOSTNAME: "localhost"
         SPARK_LOCAL_IP: "127.0.0.1"
       run: |

diff --git a/.github/workflows/pr_build_linux.yml b/.github/workflows/pr_build_linux.yml
@@ -242,27 +242,22 @@ jobs:
           - name: "Spark 3.4, JDK 11, Scala 2.12"
             java_version: "11"
             maven_opts: "-Pspark-3.4 -Pscala-2.12"
-            scan_impl: "auto"
 
           - name: "Spark 3.5.5, JDK 17, Scala 2.13"
             java_version: "17"
             maven_opts: "-Pspark-3.5 -Dspark.version=3.5.5 -Pscala-2.13"
-            scan_impl: "auto"
 
           - name: "Spark 3.5.6, JDK 17, Scala 2.13"
             java_version: "17"
             maven_opts: "-Pspark-3.5 -Dspark.version=3.5.6 -Pscala-2.13"
-            scan_impl: "auto"
 
           - name: "Spark 3.5, JDK 17, Scala 2.12"
             java_version: "17"
             maven_opts: "-Pspark-3.5 -Pscala-2.12"
-            scan_impl: "native_iceberg_compat"
 
           - name: "Spark 4.0, JDK 17"
             java_version: "17"
             maven_opts: "-Pspark-4.0"
-            scan_impl: "auto"
         suite:
           - name: "fuzz"
             value: |
@@ -347,7 +342,7 @@ jobs:
             value: |
               org.apache.spark.sql.CometToPrettyStringSuite
       fail-fast: false
-    name: ${{ matrix.profile.name }}/${{ matrix.profile.scan_impl }} [${{ matrix.suite.name }}]
+    name: ${{ matrix.profile.name }} [${{ matrix.suite.name }}]
     runs-on: ${{ github.repository_owner == 'apache' && format('runs-on={0},family=m8a+m7a+c8a,cpu=16,image=ubuntu24-full-x64,extras=s3-cache,disk=large,tag=datafusion-comet', github.run_id) || 'ubuntu-latest' }}
     container:
       image: amd64/rust
@@ -385,10 +380,9 @@ jobs:
       - name: Java test steps
         uses: ./.github/actions/java-test
         with:
-          artifact_name: ${{ matrix.profile.name }}-${{ matrix.suite.name }}-${{ github.run_id }}-${{ github.run_number }}-${{ github.run_attempt }}-${{ matrix.profile.scan_impl }}
+          artifact_name: ${{ matrix.profile.name }}-${{ matrix.suite.name }}-${{ github.run_id }}-${{ github.run_number }}-${{ github.run_attempt }}
           suites: ${{ matrix.suite.name == 'sql' && matrix.profile.name == 'Spark 3.4, JDK 11, Scala 2.12' && '' || matrix.suite.value }}
           maven_opts: ${{ matrix.profile.maven_opts }}
-          scan_impl: ${{ matrix.profile.scan_impl }}
           upload-test-reports: true
           skip-native-build: true
 

diff --git a/.github/workflows/spark_sql_test.yml b/.github/workflows/spark_sql_test.yml
@@ -131,18 +131,16 @@ jobs:
           - {name: "sql_hive-1", args1: "", args2: "hive/testOnly * -- -l org.apache.spark.tags.ExtendedHiveTest -l org.apache.spark.tags.SlowHiveTest"}
           - {name: "sql_hive-2", args1: "", args2: "hive/testOnly * -- -n org.apache.spark.tags.ExtendedHiveTest"}
           - {name: "sql_hive-3", args1: "", args2: "hive/testOnly * -- -n org.apache.spark.tags.SlowHiveTest"}
-        # Since 4f5eaf0, auto mode uses native_datafusion for V1 scans,
-        # so we only need to test with auto.
         config:
-          - {spark-short: '3.4', spark-full: '3.4.3', java: 11, scan-impl: 'auto'}
-          - {spark-short: '3.5', spark-full: '3.5.8', java: 11, scan-impl: 'auto'}
-          - {spark-short: '4.0', spark-full: '4.0.1', java: 17, scan-impl: 'auto'}
+          - {spark-short: '3.4', spark-full: '3.4.3', java: 11}
+          - {spark-short: '3.5', spark-full: '3.5.8', java: 11}
+          - {spark-short: '4.0', spark-full: '4.0.1', java: 17}
         # Skip sql_hive-1 for Spark 4.0 due to https://github.com/apache/datafusion-comet/issues/2946
         exclude:
-          - config: {spark-short: '4.0', spark-full: '4.0.1', java: 17, scan-impl: 'auto'}
+          - config: {spark-short: '4.0', spark-full: '4.0.1', java: 17}
             module: {name: "sql_hive-1", args1: "", args2: "hive/testOnly * -- -l org.apache.spark.tags.ExtendedHiveTest -l org.apache.spark.tags.SlowHiveTest"}
       fail-fast: false
-    name: spark-sql-${{ matrix.config.scan-impl }}-${{ matrix.module.name }}/spark-${{ matrix.config.spark-full }}
+    name: spark-sql-${{ matrix.module.name }}/spark-${{ matrix.config.spark-full }}
     runs-on: ${{ matrix.os }}
     container:
       image: amd64/rust
@@ -168,7 +166,7 @@ jobs:
         run: |
           cd apache-spark
           rm -rf /root/.m2/repository/org/apache/parquet # somehow parquet cache requires cleanups
-          NOLINT_ON_COMPILE=true ENABLE_COMET=true ENABLE_COMET_ONHEAP=true COMET_PARQUET_SCAN_IMPL=${{ matrix.config.scan-impl }} ENABLE_COMET_LOG_FALLBACK_REASONS=${{ github.event.inputs.collect-fallback-logs || 'false' }} \
+          NOLINT_ON_COMPILE=true ENABLE_COMET=true ENABLE_COMET_ONHEAP=true ENABLE_COMET_LOG_FALLBACK_REASONS=${{ github.event.inputs.collect-fallback-logs || 'false' }} \
             build/sbt -Dsbt.log.noformat=true ${{ matrix.module.args1 }} "${{ matrix.module.args2 }}"
           if [ "${{ github.event.inputs.collect-fallback-logs }}" = "true" ]; then
             find . -type f -name "unit-tests.log" -print0 | xargs -0 grep -h "Comet cannot accelerate" | sed 's/.*Comet cannot accelerate/Comet cannot accelerate/' | sort -u > fallback.log
@@ -179,7 +177,7 @@ jobs:
         if: ${{ github.event.inputs.collect-fallback-logs == 'true' }}
         uses: actions/upload-artifact@v7
         with:
-          name: fallback-log-spark-sql-${{ matrix.config.scan-impl }}-${{ matrix.module.name }}-spark-${{ matrix.config.spark-full }}
+          name: fallback-log-spark-sql-${{ matrix.module.name }}-spark-${{ matrix.config.spark-full }}
           path: "**/fallback.log"
 
   merge-fallback-logs:

diff --git a/.github/workflows/spark_sql_test_native_iceberg_compat.yml b/.github/workflows/spark_sql_test_native_iceberg_compat.yml
diff --git a/common/src/main/scala/org/apache/comet/CometConf.scala b/common/src/main/scala/org/apache/comet/CometConf.scala
@@ -114,22 +114,23 @@ object CometConf extends ShimCometConf {
       .booleanConf
       .createWithEnvVarOrDefault("ENABLE_COMET_WRITE", false)
 
+  @deprecated
   val SCAN_NATIVE_DATAFUSION = "native_datafusion"
+
+  @deprecated
   val SCAN_NATIVE_ICEBERG_COMPAT = "native_iceberg_compat"
+
+  @deprecated
   val SCAN_AUTO = "auto"
 
+  @deprecated
   val COMET_NATIVE_SCAN_IMPL: ConfigEntry[String] = conf("spark.comet.scan.impl")
-    .category(CATEGORY_PARQUET)
-    .doc(
-      "The implementation of Comet's Parquet scan to use. Available scans are " +
-        s"`$SCAN_NATIVE_DATAFUSION`, and `$SCAN_NATIVE_ICEBERG_COMPAT`. " +
-        s"`$SCAN_NATIVE_DATAFUSION` is a fully native implementation, and " +
-        s"`$SCAN_NATIVE_ICEBERG_COMPAT` is a hybrid implementation that supports some " +
-        "additional features, such as row indexes and field ids. " +
-        s"`$SCAN_AUTO` (default) chooses the best available scan based on the scan schema.")
+    .category(CATEGORY_TESTING)
+    .internal()
+    .doc("This configuration option is deprecated and has no effect on Comet behavior.")
     .stringConf
     .transform(_.toLowerCase(Locale.ROOT))
-    .checkValues(Set(SCAN_NATIVE_DATAFUSION, SCAN_NATIVE_ICEBERG_COMPAT, SCAN_AUTO))
+    .checkValues(Set(SCAN_NATIVE_DATAFUSION, SCAN_AUTO))
     .createWithEnvVarOrDefault("COMET_PARQUET_SCAN_IMPL", SCAN_AUTO)
 
   val COMET_ICEBERG_NATIVE_ENABLED: ConfigEntry[Boolean] =

diff --git a/docs/source/contributor-guide/bug_triage.md b/docs/source/contributor-guide/bug_triage.md
@@ -73,8 +73,7 @@ help contributors find bugs in their area of expertise.
 | `area:ffi`         | Arrow FFI / JNI boundary                  |
 | `area:ci`          | CI/CD, GitHub Actions, build tooling      |
 
-The following pre-existing labels also serve as area indicators: `native_datafusion`,
-`native_iceberg_compat`, `spark 4`, `spark sql tests`.
+The following pre-existing labels also serve as area indicators: `spark 4`, `spark sql tests`.
 
 ## Triage Process
 
@@ -109,9 +108,8 @@ Periodically review open bugs to ensure priorities are still accurate:
    crashes, because crashes are at least visible.
 2. **User-reported over test-only.** A bug hit by a real user on a real workload takes priority
    over one found only in test suites.
-3. **Core path over experimental.** Bugs in the default scan mode (`native_comet`) or widely-used
-   expressions take priority over bugs in experimental features like `native_datafusion` or
-   `native_iceberg_compat`.
+3. **Core path over experimental.** Bugs in widely-used expressions and operators take priority over
+   bugs in experimental features.
 4. **Production safety over feature completeness.** Fixing a data corruption bug is more important
    than adding support for a new expression.