diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 399f7ef..1b107d0 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -60,11 +60,41 @@ jobs: # Merge all these results together python scripts/merge_data_files.py out.yaml eessi*.yaml mv out.yaml docs/data/eessi_software_metadata.yaml + + # Also do RISC-V repository + rm eessi*.yaml + # Need to set environment variables to load EESSI module for RISC-V + ( + module purge + export EESSI_DEBUG_INIT=1 + export EESSI_VERSION_OVERRIDE=2025.06-001 + export EESSI_ARCHDETECT_OPTIONS_OVERRIDE=riscv64/generic + export EESSI_ACCELERATOR_TARGET_OVERRIDE="doesnotexist" + # Need to fake uname to convince EESSI we are on a RISC-V system + uname() { echo "riscv64"; } + export -f uname + module load EESSI/2025.06 + # Can't run RISC-V binaries from compat layer so remove them + export PATH=$(echo "$PATH" | /usr/bin/tr ':' '\n' | /usr/bin/grep -v '^/cvmfs' | /usr/bin/paste -sd ':' -) + module load EasyBuild/5 + module load EESSI-extend + python scripts/generate_data_files.py --eessi-version 2025.06-001 --repository "dev.eessi.io/riscv" + ) + python scripts/merge_data_files.py out_riscv.yaml eessi*.yaml + mv out_riscv.yaml docs/data/eessi_software_metadata-riscv.yaml + # Generate json data files and markdown index/description for them cd docs/data python ../../scripts/process_eessi_software_metadata.py eessi_software_metadata.yaml eessi_api_metadata + python ../../scripts/process_eessi_software_metadata.py eessi_software_metadata-riscv.yaml eessi_api_metadata-riscv python ../../scripts/calculate_hashes.py - for json_file in *.json; do + for json_file in eessi_api_metadata_*.json; do + python ../../scripts/generate_schema_md.py $json_file >> index.md + done + echo "" >> index.md + echo "### RISC-V datafiles" >> index.md + echo "" >> index.md + for json_file in eessi_api_metadata-riscv_*.json; do python ../../scripts/generate_schema_md.py $json_file >> index.md done - run: | diff --git a/.github/workflows/prs.yml b/.github/workflows/prs.yml index da0d590..25ca3ce 100644 --- a/.github/workflows/prs.yml +++ b/.github/workflows/prs.yml @@ -34,16 +34,52 @@ jobs: export EESSI_ACCELERATOR_TARGET_OVERRIDE="accel/nvidia/cc90" export EESSI_OVERRIDE_GPU_CHECK=1 # Only do 2023.06 for EB 5 since this is just a test - ( module load EESSI/2023.06 && module load EasyBuild/5 && module load EESSI-extend && python scripts/generate_data_files.py --eessi-version=2023.06 ) & + ( + module purge + module load EESSI/2023.06 + module load EasyBuild/5 + module load EESSI-extend + python scripts/generate_data_files.py --eessi-version=2023.06 + ) & # Merge all these results together wait python scripts/merge_data_files.py out.yaml eessi*.yaml mv out.yaml docs/data/eessi_software_metadata.yaml + + # Also test RISC-V + rm eessi*.yaml + # Need to set environment variables to load EESSI module for RISC-V + ( + module purge + export EESSI_DEBUG_INIT=1 + export EESSI_VERSION_OVERRIDE=2025.06-001 + export EESSI_ARCHDETECT_OPTIONS_OVERRIDE=riscv64/generic + export EESSI_ACCELERATOR_TARGET_OVERRIDE="doesnotexist" + # Need to fake uname to convince EESSI we are on a RISC-V system + uname() { echo "riscv64"; } + export -f uname + module load EESSI/2025.06 + # Can't run RISC-V binaries from compat layer so remove them + export PATH=$(echo "$PATH" | /usr/bin/tr ':' '\n' | /usr/bin/grep -v '^/cvmfs' | /usr/bin/paste -sd ':' -) + module load EasyBuild/5 + module load EESSI-extend + python scripts/generate_data_files.py --eessi-version 2025.06-001 --repository "dev.eessi.io/riscv" + ) + python scripts/merge_data_files.py out_riscv.yaml eessi*.yaml + mv out_riscv.yaml docs/data/eessi_software_metadata-riscv.yaml + # Generate json data files and markdown index/description for them cd docs/data python ../../scripts/process_eessi_software_metadata.py eessi_software_metadata.yaml eessi_api_metadata + python ../../scripts/process_eessi_software_metadata.py eessi_software_metadata-riscv.yaml eessi_api_metadata-riscv python ../../scripts/calculate_hashes.py - for json_file in *.json; do + for json_file in eessi_api_metadata_*.json; do + python ../../scripts/generate_schema_md.py $json_file >> index.md + done + echo "" >> index.md + echo "### RISC-V datafiles" >> index.md + echo "" >> index.md + for json_file in eessi_api_metadata-riscv*.json; do python ../../scripts/generate_schema_md.py $json_file >> index.md done - name: Test building the website @@ -54,4 +90,4 @@ jobs: uses: actions/upload-artifact@v4 with: name: eessi-api-metadata - path: docs/data/eessi_api_metadata_software.json + path: docs/data/eessi_api_metadata*software*.json diff --git a/scripts/generate_data_files.py b/scripts/generate_data_files.py index b16e405..852fb75 100644 --- a/scripts/generate_data_files.py +++ b/scripts/generate_data_files.py @@ -18,7 +18,16 @@ from easybuild.tools.include import include_easyblocks from contextlib import contextmanager -VALID_EESSI_VERSIONS = ["2025.06", "2023.06"] +SUPPORTED_REPOSITORIES = { + 'software.eessi.io': ["2025.06", "2023.06"], + 'dev.eessi.io/riscv': ["2025.06-001"], +} + +VALID_EESSI_VERSIONS = [ + version + for versions in SUPPORTED_REPOSITORIES.values() + for version in versions +] # Give order to my toolchains so I can easily figure out what "latest" means EESSI_SUPPORTED_TOP_LEVEL_TOOLCHAINS = OrderedDict( @@ -205,20 +214,31 @@ def merge_dicts(d1, d2): "-e", required=True, choices=VALID_EESSI_VERSIONS, - help=f"Allowed versions: {', '.join(VALID_EESSI_VERSIONS)}", + help=f"Allowed versions (also dependent on repository): {', '.join(VALID_EESSI_VERSIONS)}", + ) + parser.add_argument( + "--repository", + "-r", + default="software.eessi.io", + choices=SUPPORTED_REPOSITORIES.keys(), + help=f"Repository to scan: {', '.join(SUPPORTED_REPOSITORIES)} (default: %(default)s)", ) args = parser.parse_args() eessi_version = args.eessi_version + repository = args.repository + + if eessi_version not in SUPPORTED_REPOSITORIES[repository]: + raise ValueError(f"You must choose an EESSI version supported by the repository: {SUPPORTED_REPOSITORIES[repository]}") print(f"Using EESSI version: {eessi_version}") # We use a single architecture path to gather information about the software versions eessi_reference_architecture = os.getenv("EESSI_ARCHDETECT_OPTIONS_OVERRIDE", False) if not eessi_reference_architecture: - print("You must have selected a CPU architecture via EESSI_ARCHDETECT_OPTIONS_OVERRIDE") + print("You must have selected a CPU architecture via EESSI_ARCHDETECT_OPTIONS_OVERRIDE environment variable") exit() - base_path = f"/cvmfs/software.eessi.io/versions/{eessi_version}/software/linux/{eessi_reference_architecture}" + base_path = f"/cvmfs/{repository}/versions/{eessi_version}/software/linux/{eessi_reference_architecture}" cpu_easyconfig_files_dict = collect_eb_files(os.path.join(base_path, "software")) # We also gather all the acclerator installations for NVIDIA-enabled packages # We're not typically running this script on a node with a GPU so an override must have been set @@ -243,7 +263,8 @@ def merge_dicts(d1, d2): # Store the toolchain hierarchies supported by the EESSI version eessi_software["eessi_version"][eessi_version]["toolchain_hierarchy"] = {} - for top_level_toolchain in EESSI_SUPPORTED_TOP_LEVEL_TOOLCHAINS[eessi_version]: + # RISC-V versions have a stub like -001 at the end, make sure to drop it + for top_level_toolchain in EESSI_SUPPORTED_TOP_LEVEL_TOOLCHAINS[eessi_version.split("-")[0]]: # versions are typically 2024a/2024b etc. for top level toolchains # so let's use that to make sorting easy toolchain_family = f"{top_level_toolchain['version']}_{top_level_toolchain['name']}" diff --git a/scripts/process_eessi_software_metadata.py b/scripts/process_eessi_software_metadata.py index e2042a2..30b2bc7 100644 --- a/scripts/process_eessi_software_metadata.py +++ b/scripts/process_eessi_software_metadata.py @@ -24,6 +24,10 @@ "x86_64/intel/cascadelake", ] +RISCV_ARCHITECTURES = [ + "riscv64/generic", +] + NVIDIA_ARCHITECTURES = [ "accel/nvidia/cc70", "accel/nvidia/cc80", @@ -63,7 +67,11 @@ def get_software_information_by_filename(file_metadata, original_path=None, tool # 1) Detect the architecture substring inside the path base_version_dict["cpu_arch"] = [] detected_arch = None - for arch in ARCHITECTURES: + if '/riscv64/' in original_path: + architecture_group = RISCV_ARCHITECTURES + else: + architecture_group = ARCHITECTURES + for arch in architecture_group: if f"/{arch}/" in original_path: detected_arch = arch break @@ -89,11 +97,14 @@ def get_software_information_by_filename(file_metadata, original_path=None, tool spider_cache = before_arch + detected_arch + "/.lmod/cache/spiderT.lua" # 3) Substitute each architecture and test module file existence in spider cache - for arch in ARCHITECTURES: + for arch in architecture_group: substituted_modulefile = modulefile.replace(detected_arch, arch) substituted_spider_cache = spider_cache.replace(detected_arch, arch) - # os.path.exists is very expensive for CVMFS so we just look for the file in the spider cache - found = subprocess.run(["grep", "-q", substituted_modulefile, substituted_spider_cache]).returncode == 0 + # os.path.exists is very expensive for CVMFS so we just look for the file in the spider cache (if we can) + if os.path.exists(substituted_spider_cache): + found = subprocess.run(["grep", "-q", substituted_modulefile, substituted_spider_cache]).returncode == 0 + else: + found = os.path.exists(substituted_modulefile) if found: base_version_dict["cpu_arch"].append(arch) # If we have an accelerator module let's check which architectures are supported @@ -106,7 +117,15 @@ def get_software_information_by_filename(file_metadata, original_path=None, tool # Let's not include the "accel/" part of the accel_arch base_version_dict["gpu_arch"][arch].append(accel_arch.replace("accel/", "", 1)) else: - print(f"No module {accel_substituted_modulefile}...not adding software for architecture {arch}/{accel_arch}") + # Let's not be too noisy here, we know we don't have some CUDA archs in 2023.06 + if not ( + accel_substituted_modulefile.startswith('/cvmfs/software.eessi.io/versions/2023.06') + and accel_arch in ["accel/nvidia/cc100", "accel/nvidia/cc120"] + ): + print( + f"No module {accel_substituted_modulefile}... " + f"not adding software for architecture {arch}/{accel_arch}" + ) continue else: print(f"No module {substituted_modulefile}...not adding software for architecture {arch}") @@ -368,7 +387,7 @@ def main(): base_json_metadata["architectures_map"] = {} for eessi_version in eessi_versions: base_json_metadata["architectures_map"][eessi_version] = {} - for architecture in ARCHITECTURES: + for architecture in ARCHITECTURES + RISCV_ARCHITECTURES: base_json_metadata["architectures_map"][eessi_version][architecture] = architecture base_json_metadata["gpu_architectures_map"] = {} base_json_metadata["category_details"] = {}