Skip to content
32 changes: 31 additions & 1 deletion .github/workflows/docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -60,11 +60,41 @@ jobs:
# Merge all these results together
python scripts/merge_data_files.py out.yaml eessi*.yaml
mv out.yaml docs/data/eessi_software_metadata.yaml

# Also do RISC-V repository
rm eessi*.yaml
# Need to set environment variables to load EESSI module for RISC-V
(
module purge
export EESSI_DEBUG_INIT=1
export EESSI_VERSION_OVERRIDE=2025.06-001
export EESSI_ARCHDETECT_OPTIONS_OVERRIDE=riscv64/generic
export EESSI_ACCELERATOR_TARGET_OVERRIDE="doesnotexist"
# Need to fake uname to convince EESSI we are on a RISC-V system
uname() { echo "riscv64"; }
export -f uname
module load EESSI/2025.06
# Can't run RISC-V binaries from compat layer so remove them
export PATH=$(echo "$PATH" | /usr/bin/tr ':' '\n' | /usr/bin/grep -v '^/cvmfs' | /usr/bin/paste -sd ':' -)
module load EasyBuild/5
module load EESSI-extend
python scripts/generate_data_files.py --eessi-version 2025.06-001 --repository "dev.eessi.io/riscv"
)
python scripts/merge_data_files.py out_riscv.yaml eessi*.yaml
mv out_riscv.yaml docs/data/eessi_software_metadata-riscv.yaml

# Generate json data files and markdown index/description for them
cd docs/data
python ../../scripts/process_eessi_software_metadata.py eessi_software_metadata.yaml eessi_api_metadata
python ../../scripts/process_eessi_software_metadata.py eessi_software_metadata-riscv.yaml eessi_api_metadata-riscv
python ../../scripts/calculate_hashes.py
for json_file in *.json; do
for json_file in eessi_api_metadata_*.json; do
python ../../scripts/generate_schema_md.py $json_file >> index.md
done
echo "" >> index.md
echo "### RISC-V datafiles" >> index.md
echo "" >> index.md
for json_file in eessi_api_metadata-riscv_*.json; do
python ../../scripts/generate_schema_md.py $json_file >> index.md
done
- run: |
Expand Down
42 changes: 39 additions & 3 deletions .github/workflows/prs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,16 +34,52 @@ jobs:
export EESSI_ACCELERATOR_TARGET_OVERRIDE="accel/nvidia/cc90"
export EESSI_OVERRIDE_GPU_CHECK=1
# Only do 2023.06 for EB 5 since this is just a test
( module load EESSI/2023.06 && module load EasyBuild/5 && module load EESSI-extend && python scripts/generate_data_files.py --eessi-version=2023.06 ) &
(
module purge
module load EESSI/2023.06
module load EasyBuild/5
module load EESSI-extend
python scripts/generate_data_files.py --eessi-version=2023.06
) &
# Merge all these results together
wait
python scripts/merge_data_files.py out.yaml eessi*.yaml
mv out.yaml docs/data/eessi_software_metadata.yaml

# Also test RISC-V
rm eessi*.yaml
# Need to set environment variables to load EESSI module for RISC-V
(
module purge
export EESSI_DEBUG_INIT=1
export EESSI_VERSION_OVERRIDE=2025.06-001
export EESSI_ARCHDETECT_OPTIONS_OVERRIDE=riscv64/generic
export EESSI_ACCELERATOR_TARGET_OVERRIDE="doesnotexist"
# Need to fake uname to convince EESSI we are on a RISC-V system
uname() { echo "riscv64"; }
export -f uname
module load EESSI/2025.06
# Can't run RISC-V binaries from compat layer so remove them
export PATH=$(echo "$PATH" | /usr/bin/tr ':' '\n' | /usr/bin/grep -v '^/cvmfs' | /usr/bin/paste -sd ':' -)
module load EasyBuild/5
module load EESSI-extend
python scripts/generate_data_files.py --eessi-version 2025.06-001 --repository "dev.eessi.io/riscv"
)
python scripts/merge_data_files.py out_riscv.yaml eessi*.yaml
mv out_riscv.yaml docs/data/eessi_software_metadata-riscv.yaml

# Generate json data files and markdown index/description for them
cd docs/data
python ../../scripts/process_eessi_software_metadata.py eessi_software_metadata.yaml eessi_api_metadata
python ../../scripts/process_eessi_software_metadata.py eessi_software_metadata-riscv.yaml eessi_api_metadata-riscv
python ../../scripts/calculate_hashes.py
for json_file in *.json; do
for json_file in eessi_api_metadata_*.json; do
python ../../scripts/generate_schema_md.py $json_file >> index.md
done
echo "" >> index.md
echo "### RISC-V datafiles" >> index.md
echo "" >> index.md
for json_file in eessi_api_metadata-riscv*.json; do
python ../../scripts/generate_schema_md.py $json_file >> index.md
done
- name: Test building the website
Expand All @@ -54,4 +90,4 @@ jobs:
uses: actions/upload-artifact@v4
with:
name: eessi-api-metadata
path: docs/data/eessi_api_metadata_software.json
path: docs/data/eessi_api_metadata*software*.json
31 changes: 26 additions & 5 deletions scripts/generate_data_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,16 @@
from easybuild.tools.include import include_easyblocks
from contextlib import contextmanager

VALID_EESSI_VERSIONS = ["2025.06", "2023.06"]
SUPPORTED_REPOSITORIES = {
'software.eessi.io': ["2025.06", "2023.06"],
'dev.eessi.io/riscv': ["2025.06-001"],
}

VALID_EESSI_VERSIONS = [
version
for versions in SUPPORTED_REPOSITORIES.values()
for version in versions
]

# Give order to my toolchains so I can easily figure out what "latest" means
EESSI_SUPPORTED_TOP_LEVEL_TOOLCHAINS = OrderedDict(
Expand Down Expand Up @@ -205,20 +214,31 @@ def merge_dicts(d1, d2):
"-e",
required=True,
choices=VALID_EESSI_VERSIONS,
help=f"Allowed versions: {', '.join(VALID_EESSI_VERSIONS)}",
help=f"Allowed versions (also dependent on repository): {', '.join(VALID_EESSI_VERSIONS)}",
)
parser.add_argument(
"--repository",
"-r",
default="software.eessi.io",
choices=SUPPORTED_REPOSITORIES.keys(),
help=f"Repository to scan: {', '.join(SUPPORTED_REPOSITORIES)} (default: %(default)s)",
)

args = parser.parse_args()
eessi_version = args.eessi_version
repository = args.repository

if eessi_version not in SUPPORTED_REPOSITORIES[repository]:
raise ValueError(f"You must choose an EESSI version supported by the repository: {SUPPORTED_REPOSITORIES[repository]}")

print(f"Using EESSI version: {eessi_version}")

# We use a single architecture path to gather information about the software versions
eessi_reference_architecture = os.getenv("EESSI_ARCHDETECT_OPTIONS_OVERRIDE", False)
if not eessi_reference_architecture:
print("You must have selected a CPU architecture via EESSI_ARCHDETECT_OPTIONS_OVERRIDE")
print("You must have selected a CPU architecture via EESSI_ARCHDETECT_OPTIONS_OVERRIDE environment variable")
exit()
base_path = f"/cvmfs/software.eessi.io/versions/{eessi_version}/software/linux/{eessi_reference_architecture}"
base_path = f"/cvmfs/{repository}/versions/{eessi_version}/software/linux/{eessi_reference_architecture}"
cpu_easyconfig_files_dict = collect_eb_files(os.path.join(base_path, "software"))
# We also gather all the acclerator installations for NVIDIA-enabled packages
# We're not typically running this script on a node with a GPU so an override must have been set
Expand All @@ -243,7 +263,8 @@ def merge_dicts(d1, d2):

# Store the toolchain hierarchies supported by the EESSI version
eessi_software["eessi_version"][eessi_version]["toolchain_hierarchy"] = {}
for top_level_toolchain in EESSI_SUPPORTED_TOP_LEVEL_TOOLCHAINS[eessi_version]:
# RISC-V versions have a stub like -001 at the end, make sure to drop it
for top_level_toolchain in EESSI_SUPPORTED_TOP_LEVEL_TOOLCHAINS[eessi_version.split("-")[0]]:
# versions are typically 2024a/2024b etc. for top level toolchains
# so let's use that to make sorting easy
toolchain_family = f"{top_level_toolchain['version']}_{top_level_toolchain['name']}"
Expand Down
31 changes: 25 additions & 6 deletions scripts/process_eessi_software_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@
"x86_64/intel/cascadelake",
]

RISCV_ARCHITECTURES = [
"riscv64/generic",
]

NVIDIA_ARCHITECTURES = [
"accel/nvidia/cc70",
"accel/nvidia/cc80",
Expand Down Expand Up @@ -63,7 +67,11 @@ def get_software_information_by_filename(file_metadata, original_path=None, tool
# 1) Detect the architecture substring inside the path
base_version_dict["cpu_arch"] = []
detected_arch = None
for arch in ARCHITECTURES:
if '/riscv64/' in original_path:
architecture_group = RISCV_ARCHITECTURES
else:
architecture_group = ARCHITECTURES
for arch in architecture_group:
if f"/{arch}/" in original_path:
detected_arch = arch
break
Expand All @@ -89,11 +97,14 @@ def get_software_information_by_filename(file_metadata, original_path=None, tool
spider_cache = before_arch + detected_arch + "/.lmod/cache/spiderT.lua"

# 3) Substitute each architecture and test module file existence in spider cache
for arch in ARCHITECTURES:
for arch in architecture_group:
substituted_modulefile = modulefile.replace(detected_arch, arch)
substituted_spider_cache = spider_cache.replace(detected_arch, arch)
# os.path.exists is very expensive for CVMFS so we just look for the file in the spider cache
found = subprocess.run(["grep", "-q", substituted_modulefile, substituted_spider_cache]).returncode == 0
# os.path.exists is very expensive for CVMFS so we just look for the file in the spider cache (if we can)
if os.path.exists(substituted_spider_cache):
found = subprocess.run(["grep", "-q", substituted_modulefile, substituted_spider_cache]).returncode == 0
else:
found = os.path.exists(substituted_modulefile)
if found:
base_version_dict["cpu_arch"].append(arch)
# If we have an accelerator module let's check which architectures are supported
Expand All @@ -106,7 +117,15 @@ def get_software_information_by_filename(file_metadata, original_path=None, tool
# Let's not include the "accel/" part of the accel_arch
base_version_dict["gpu_arch"][arch].append(accel_arch.replace("accel/", "", 1))
else:
print(f"No module {accel_substituted_modulefile}...not adding software for architecture {arch}/{accel_arch}")
# Let's not be too noisy here, we know we don't have some CUDA archs in 2023.06
if not (
accel_substituted_modulefile.startswith('/cvmfs/software.eessi.io/versions/2023.06')
and accel_arch in ["accel/nvidia/cc100", "accel/nvidia/cc120"]
):
print(
f"No module {accel_substituted_modulefile}... "
f"not adding software for architecture {arch}/{accel_arch}"
)
continue
else:
print(f"No module {substituted_modulefile}...not adding software for architecture {arch}")
Expand Down Expand Up @@ -368,7 +387,7 @@ def main():
base_json_metadata["architectures_map"] = {}
for eessi_version in eessi_versions:
base_json_metadata["architectures_map"][eessi_version] = {}
for architecture in ARCHITECTURES:
for architecture in ARCHITECTURES + RISCV_ARCHITECTURES:
base_json_metadata["architectures_map"][eessi_version][architecture] = architecture
base_json_metadata["gpu_architectures_map"] = {}
base_json_metadata["category_details"] = {}
Expand Down
Loading