diff --git a/.github/workflows/monthly_collection.yaml b/.github/workflows/monthly_collection.yaml new file mode 100644 index 0000000..e323cc8 --- /dev/null +++ b/.github/workflows/monthly_collection.yaml @@ -0,0 +1,42 @@ +name: Monthly Collection + +on: + workflow_dispatch: + + schedule: + - cron: '1 0 1 * *' # Runs at 00:01 on the 1st day of every month + +jobs: + monthly_github_collection: + environment: monthly + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v6 + + - name: Install uv + uses: astral-sh/setup-uv@v8 + with: + enable-cache: true + activate-environment: true + + - name: Install dependencies + run: | + uv pip install . + + - name: Collect GitHub Data + run: | + uv run gitmetrics collect -q \ + --add-metrics \ + --config-file monthly_extraction_config.yaml \ + --token ${{ secrets.GITHUB_TOKEN }} \ + --output-folder ${{ secrets.OUTPUT_FOLDER }} + env: + PYDRIVE_CREDENTIALS: ${{ secrets.PYDRIVE_CREDENTIALS }} + + - name: Consolidate GitHub Data + run: | + uv run gitmetrics consolidate \ + --config-file monthly_extraction_config.yaml \ + --output-folder ${{ secrets.OUTPUT_FOLDER }} + env: + PYDRIVE_CREDENTIALS: ${{ secrets.PYDRIVE_CREDENTIALS }} diff --git a/gitmetrics/drive.py b/gitmetrics/drive.py index d7b201e..210070f 100644 --- a/gitmetrics/drive.py +++ b/gitmetrics/drive.py @@ -16,6 +16,7 @@ LOGGER = logging.getLogger(__name__) GDRIVE_LINK = 'gdrive://' +MAX_UPLOAD_RETRIES = 10 def is_drive_path(path): @@ -91,9 +92,30 @@ def upload_spreadsheet(content, filename, folder): file_config = {'title': filename, 'parents': [{'id': folder}]} drive_file = drive.CreateFile(file_config) + content.seek(0) drive_file.content = content - drive_file.Upload({'convert': True}) - LOGGER.info('Created file %s', drive_file.metadata['alternateLink']) + + retries = 0 + while retries != MAX_UPLOAD_RETRIES: + try: + if drive_file['mimeType'] == SPREADSHEET_MIMETYPE: + drive_file.Upload() + else: + drive_file.Upload({'convert': True}) + + LOGGER.info('Created file %s', drive_file.metadata['alternateLink']) + break + except Exception as e: + retries += 1 + LOGGER.warning( + 'Upload failed (%s/%s) for %s: %s', + retries, + MAX_UPLOAD_RETRIES, + drive_file.get('title', 'unknown'), + str(e), + ) + if retries == MAX_UPLOAD_RETRIES: + raise def download_spreadsheet(folder, filename): diff --git a/monthly_extraction_config.yaml b/monthly_extraction_config.yaml new file mode 100644 index 0000000..31c3f75 --- /dev/null +++ b/monthly_extraction_config.yaml @@ -0,0 +1,11 @@ +import_config: project_definitions.yaml +projects: + huggingface: + grafana: + pandas: + airflow: + PyTorchLightning: + scikit-learn: + ray: + airbyte: + PyTorchLightning: diff --git a/weekly_extraction_config.yaml b/weekly_extraction_config.yaml index 76fcbe7..e3fb698 100644 --- a/weekly_extraction_config.yaml +++ b/weekly_extraction_config.yaml @@ -1,21 +1,13 @@ import_config: project_definitions.yaml projects: - PyTorchLightning: - airbyte: - airflow: dagster: dbt: determined: earthly: feast: featuretools: - grafana: - huggingface: mariadb: - pandas: prefect: pycaret: - ray: - scikit-learn: snorkel: spacy: