Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 70 additions & 0 deletions packages/backend/src/gitlab.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -68,3 +68,73 @@ test('shouldExcludeProject returns false when exclude.userOwnedProjects is true
exclude: { userOwnedProjects: true },
})).toBe(false);
});

test('shouldExcludeProject returns true when project size is less than exclude.size.min.', () => {
const project = {
path_with_namespace: 'test/project',
statistics: {
storage_size: 99,
},
} as unknown as ProjectSchema;

expect(shouldExcludeProject({
project,
exclude: {
size: {
min: 100,
},
},
})).toBe(true);
});

test('shouldExcludeProject returns true when project size is greater than exclude.size.max.', () => {
const project = {
path_with_namespace: 'test/project',
statistics: {
storage_size: 101,
},
} as unknown as ProjectSchema;

expect(shouldExcludeProject({
project,
exclude: {
size: {
max: 100,
},
},
})).toBe(true);
});

test('shouldExcludeProject returns false when project size is within exclude.size bounds.', () => {
const project = {
path_with_namespace: 'test/project',
statistics: {
storage_size: 100,
},
} as unknown as ProjectSchema;

expect(shouldExcludeProject({
project,
exclude: {
size: {
min: 100,
max: 100,
},
},
})).toBe(false);
});

test('shouldExcludeProject returns false when exclude.size is set but project statistics are unavailable.', () => {
const project = {
path_with_namespace: 'test/project',
} as ProjectSchema;

expect(shouldExcludeProject({
project,
exclude: {
size: {
min: 100,
},
},
})).toBe(false);
});
81 changes: 76 additions & 5 deletions packages/backend/src/gitlab.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,17 @@ import { fetchWithRetry, measure } from "./utils.js";
const logger = createLogger('gitlab');
export const GITLAB_CLOUD_HOSTNAME = "gitlab.com";

export enum AccessLevel {
MINIMAL_ACCESS = 5,
GUEST = 10,
REPORTER = 20,
DEVELOPER = 30,
MAINTAINER = 40,
OWNER = 50,
}

type ProjectsAccessLevel = AccessLevel;

export const createGitLabFromPersonalAccessToken = async ({ token, url }: { token?: string, url?: string }) => {
const isGitLabCloud = url ? new URL(url).hostname === GITLAB_CLOUD_HOSTNAME : true;
return new Gitlab({
Expand Down Expand Up @@ -48,6 +59,16 @@ export const getGitLabReposFromConfig = async (config: GitlabConnectionConfig) =
token,
url: config.url,
});
const minAccessLevel: ProjectsAccessLevel | undefined = config.minAccessLevel;
const projectListOptions = {
perPage: 100,
...(minAccessLevel !== undefined ? {
minAccessLevel,
} : {}),
...(config.exclude?.size ? {
statistics: true,
} : {}),
};
Comment on lines +62 to +71
Copy link
Contributor

@coderabbitai coderabbitai bot Feb 14, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
# First, let's locate the file and understand the context around line 64-73 and line 188
head -100 packages/backend/src/gitlab.ts | tail -50

Repository: sourcebot-dev/sourcebot

Length of output: 1916


🏁 Script executed:

#!/bin/bash
# Check for Projects.show usage patterns in the file
rg -n "Projects\.show" packages/backend/src/gitlab.ts -B 2 -A 2

Repository: sourcebot-dev/sourcebot

Length of output: 412


🏁 Script executed:

#!/bin/bash
# Search for getProjectSizeBytes to understand how statistics is used
rg -n "getProjectSizeBytes\|statistics" packages/backend/src/gitlab.ts -B 2 -A 2

Repository: sourcebot-dev/sourcebot

Length of output: 49


🏁 Script executed:

#!/bin/bash
# Check type definitions or imports for Projects API
rg -n "import.*Projects\|from.*gitbeaker" packages/backend/src/gitlab.ts

Repository: sourcebot-dev/sourcebot

Length of output: 49


🏁 Script executed:

#!/bin/bash
# Look for the entire gitlab.ts file to understand the flow
wc -l packages/backend/src/gitlab.ts

Repository: sourcebot-dev/sourcebot

Length of output: 101


🏁 Script executed:

#!/bin/bash
# Get the entire gitlab.ts file to see context
cat -n packages/backend/src/gitlab.ts

Repository: sourcebot-dev/sourcebot

Length of output: 17926


🏁 Script executed:

#!/bin/bash
# Search for getProjectSizeBytes in the entire repo
rg -n "getProjectSizeBytes" --type=ts -B 2 -A 5

Repository: sourcebot-dev/sourcebot

Length of output: 1428


🏁 Script executed:

#!/bin/bash
# Search for where size filtering/exclusion happens
rg -n "exclude.*size\|size.*exclude" --type=ts -B 3 -A 3

Repository: sourcebot-dev/sourcebot

Length of output: 49


🏁 Script executed:

#!/bin/bash
# Search for Projects.show signature or usage patterns more broadly
rg -n "Projects\.show\(" --type=ts -B 2 -A 2

Repository: sourcebot-dev/sourcebot

Length of output: 567


🏁 Script executed:

#!/bin/bash
# Check if there are any type definitions or documentation for Projects.show
fd -t f "package.json" | head -5

Repository: sourcebot-dev/sourcebot

Length of output: 196


🏁 Script executed:

#!/bin/bash
# Check node_modules or look for `@gitbeaker` documentation
find . -name "*.d.ts" -type f 2>/dev/null | grep -i gitbeaker | head -10

Repository: sourcebot-dev/sourcebot

Length of output: 49


🏁 Script executed:

#!/bin/bash
# Search for any examples of Projects.show with options in the codebase
rg -n "Projects\.show.*\{" --type=ts

Repository: sourcebot-dev/sourcebot

Length of output: 49


🌐 Web query:

@gitbeaker/rest Projects.show statistics parameter API documentation

💡 Result:

In @gitbeaker/rest, Projects.show maps to GitLab’s “Retrieve a project” endpoint (GET /projects/:id). GitLab supports a query parameter:

  • statistics (boolean): “Include project statistics” (adds a statistics object to the returned project, e.g., commit_count, storage_size, etc.; requires at least Reporter/Developer/Maintainer/Owner). [1]

So in GitBeaker you pass it via the optional options object (GitBeaker’s general rule: required args first, then an options object for optional API params) [2]:

const project = await api.Projects.show(projectId, { statistics: true });
// -> calls GET /projects/:id?statistics=true

Note: this is different from GitLab’s separate Project statistics API endpoint GET /projects/:id/statistics (clone/pull fetch stats for the last 30 days). [3]

Sources: GitLab Projects API (“Retrieve a project” params) [1]; GitBeaker usage conventions (optional params in an options object) [2]; GitLab Project statistics API (/projects/:id/statistics) [3].


Pass statistics option to Projects.show() when size filtering is enabled.

projectListOptions includes statistics: true only when config.exclude?.size is set, and this is correctly spread into Projects.all(), Groups.allProjects(), and Users.allProjects() (lines 83, 107, 148). However, the Projects.show() call on line 188 doesn't pass this option, leaving project.statistics undefined for individually listed projects. As a result, getProjectSizeBytes() returns undefined and size-based exclusion silently skips those projects.

Since @gitbeaker/rest's Projects.show() API supports the statistics parameter as a second argument, pass it when size filtering is enabled:

const fetchFn = () => api.Projects.show(project);
+const fetchFn = () => api.Projects.show(project, { statistics: config.exclude?.size ? true : undefined });
🤖 Prompt for AI Agents
In `@packages/backend/src/gitlab.ts` around lines 64 - 73, projectListOptions only
sets statistics: true when config.exclude?.size is present, but the
Projects.show() call (used to fetch individually listed projects) doesn't pass
that option, so project.statistics stays undefined and getProjectSizeBytes()
can't filter by size; update the call to Projects.show(...) to pass the same
options object (or at least { statistics: true } when config.exclude?.size) as
the second argument so that project.statistics is populated for size
filtering—use the existing projectListOptions or a conditional { statistics:
true } when calling Projects.show in the code path that loads individual
projects.

✅ Addressed in commit 525570a


let allRepos: ProjectSchema[] = [];
let allWarnings: string[] = [];
Expand All @@ -58,7 +79,7 @@ export const getGitLabReposFromConfig = async (config: GitlabConnectionConfig) =
logger.debug(`Fetching all projects visible in ${config.url}...`);
const { durationMs, data: _projects } = await measure(async () => {
const fetchFn = () => api.Projects.all({
perPage: 100,
...projectListOptions,
});
return fetchWithRetry(fetchFn, `all projects in ${config.url}`, logger);
});
Expand All @@ -82,8 +103,8 @@ export const getGitLabReposFromConfig = async (config: GitlabConnectionConfig) =
logger.debug(`Fetching project info for group ${group}...`);
const { durationMs, data } = await measure(async () => {
const fetchFn = () => api.Groups.allProjects(group, {
perPage: 100,
includeSubgroups: true
...projectListOptions,
includeSubgroups: true,
});
return fetchWithRetry(fetchFn, `group ${group}`, logger);
});
Expand Down Expand Up @@ -123,7 +144,7 @@ export const getGitLabReposFromConfig = async (config: GitlabConnectionConfig) =
logger.debug(`Fetching project info for user ${user}...`);
const { durationMs, data } = await measure(async () => {
const fetchFn = () => api.Users.allProjects(user, {
perPage: 100,
...projectListOptions,
});
return fetchWithRetry(fetchFn, `user ${user}`, logger);
});
Expand Down Expand Up @@ -162,7 +183,9 @@ export const getGitLabReposFromConfig = async (config: GitlabConnectionConfig) =
try {
logger.debug(`Fetching project info for project ${project}...`);
const { durationMs, data } = await measure(async () => {
const fetchFn = () => api.Projects.show(project);
const fetchFn = () => api.Projects.show(project, {
statistics: config.exclude?.size ? true : undefined,
});
return fetchWithRetry(fetchFn, `project ${project}`, logger);
});
logger.debug(`Found project ${project} in ${durationMs}ms.`);
Expand Down Expand Up @@ -253,6 +276,21 @@ export const shouldExcludeProject = ({
}
}

if (exclude?.size) {
const projectSizeBytes = getProjectSizeBytes(project);
if (projectSizeBytes !== undefined) {
if (exclude.size.min !== undefined && projectSizeBytes < exclude.size.min) {
reason = `project size (${projectSizeBytes}) is less than \`exclude.size.min\` (${exclude.size.min})`;
return true;
}

if (exclude.size.max !== undefined && projectSizeBytes > exclude.size.max) {
reason = `project size (${projectSizeBytes}) is greater than \`exclude.size.max\` (${exclude.size.max})`;
return true;
}
}
}

if (include?.topics) {
const configTopics = include.topics.map(topic => topic.toLowerCase());
const projectTopics = project.topics ?? [];
Expand Down Expand Up @@ -284,6 +322,39 @@ export const shouldExcludeProject = ({
return false;
}

const getProjectSizeBytes = (project: ProjectSchema): number | undefined => {
// GitLab's API returns size data in the statistics object when `statistics=true`.
// We support both snake_case and camelCase keys to be resilient to response typing differences.
const projectWithStats = project as ProjectSchema & {
statistics?: {
storage_size?: number;
repository_size?: number;
storageSize?: number;
repositorySize?: number;
};
};

const statistics = projectWithStats.statistics;
if (!statistics) {
return;
}

if (typeof statistics.storage_size === "number") {
return statistics.storage_size;
}
if (typeof statistics.repository_size === "number") {
return statistics.repository_size;
}
if (typeof statistics.storageSize === "number") {
return statistics.storageSize;
}
if (typeof statistics.repositorySize === "number") {
return statistics.repositorySize;
}

return;
}

export const getProjectMembers = async (projectId: string, api: InstanceType<typeof Gitlab>) => {
try {
const fetchFn = () => api.ProjectMembers.all(projectId, {
Expand Down
27 changes: 27 additions & 0 deletions packages/schemas/src/v3/connection.schema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,18 @@ const schema = {
],
"description": "List of groups to sync with. All projects in the group (and recursive subgroups) visible to the provided `token` (if any) will be synced, unless explicitly defined in the `exclude` property. Subgroups can be specified by providing the path to the subgroup (e.g. `my-group/sub-group-a`)."
},
"minAccessLevel": {
"type": "integer",
"enum": [
5,
10,
20,
30,
40,
50
],
"description": "Minimum GitLab access level required for projects to be returned. Uses GitLab role levels where 20=Reporter, 30=Developer, 40=Maintainer, and 50=Owner."
},
"projects": {
"type": "array",
"items": {
Expand Down Expand Up @@ -362,6 +374,21 @@ const schema = {
"ci"
]
]
},
"size": {
"type": "object",
"description": "Exclude projects based on GitLab statistics size fields (in bytes).",
"properties": {
"min": {
"type": "integer",
"description": "Minimum project size (in bytes) to sync (inclusive). Projects smaller than this will be excluded."
},
"max": {
"type": "integer",
"description": "Maximum project size (in bytes) to sync (inclusive). Projects larger than this will be excluded."
}
},
"additionalProperties": false
}
},
"additionalProperties": false
Expand Down
17 changes: 17 additions & 0 deletions packages/schemas/src/v3/connection.type.ts
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,10 @@ export interface GitlabConnectionConfig {
* List of groups to sync with. All projects in the group (and recursive subgroups) visible to the provided `token` (if any) will be synced, unless explicitly defined in the `exclude` property. Subgroups can be specified by providing the path to the subgroup (e.g. `my-group/sub-group-a`).
*/
groups?: string[];
/**
* Minimum GitLab access level required for projects to be returned. Uses GitLab role levels where 20=Reporter, 30=Developer, 40=Maintainer, and 50=Owner.
*/
minAccessLevel?: 5 | 10 | 20 | 30 | 40 | 50;
/**
* List of individual projects to sync with. The project's namespace must be specified. See: https://docs.gitlab.com/ee/user/namespace/
*/
Expand Down Expand Up @@ -166,6 +170,19 @@ export interface GitlabConnectionConfig {
* List of project topics to exclude when syncing. Projects that match one of the provided `topics` will be excluded from syncing. Glob patterns are supported.
*/
topics?: string[];
/**
* Exclude projects based on GitLab statistics size fields (in bytes).
*/
size?: {
/**
* Minimum project size (in bytes) to sync (inclusive). Projects smaller than this will be excluded.
*/
min?: number;
/**
* Maximum project size (in bytes) to sync (inclusive). Projects larger than this will be excluded.
*/
max?: number;
};
};
revisions?: GitRevisions;
}
Expand Down
27 changes: 27 additions & 0 deletions packages/schemas/src/v3/gitlab.schema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,18 @@ const schema = {
],
"description": "List of groups to sync with. All projects in the group (and recursive subgroups) visible to the provided `token` (if any) will be synced, unless explicitly defined in the `exclude` property. Subgroups can be specified by providing the path to the subgroup (e.g. `my-group/sub-group-a`)."
},
"minAccessLevel": {
"type": "integer",
"enum": [
5,
10,
20,
30,
40,
50
],
"description": "Minimum GitLab access level required for projects to be returned. Uses GitLab role levels where 20=Reporter, 30=Developer, 40=Maintainer, and 50=Owner."
},
"projects": {
"type": "array",
"items": {
Expand Down Expand Up @@ -150,6 +162,21 @@ const schema = {
"ci"
]
]
},
"size": {
"type": "object",
"description": "Exclude projects based on GitLab statistics size fields (in bytes).",
"properties": {
"min": {
"type": "integer",
"description": "Minimum project size (in bytes) to sync (inclusive). Projects smaller than this will be excluded."
},
"max": {
"type": "integer",
"description": "Maximum project size (in bytes) to sync (inclusive). Projects larger than this will be excluded."
}
},
"additionalProperties": false
}
},
"additionalProperties": false
Expand Down
17 changes: 17 additions & 0 deletions packages/schemas/src/v3/gitlab.type.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@ export interface GitlabConnectionConfig {
* List of groups to sync with. All projects in the group (and recursive subgroups) visible to the provided `token` (if any) will be synced, unless explicitly defined in the `exclude` property. Subgroups can be specified by providing the path to the subgroup (e.g. `my-group/sub-group-a`).
*/
groups?: string[];
/**
* Minimum GitLab access level required for projects to be returned. Uses GitLab role levels where 20=Reporter, 30=Developer, 40=Maintainer, and 50=Owner.
*/
minAccessLevel?: 5 | 10 | 20 | 30 | 40 | 50;
/**
* List of individual projects to sync with. The project's namespace must be specified. See: https://docs.gitlab.com/ee/user/namespace/
*/
Expand Down Expand Up @@ -68,6 +72,19 @@ export interface GitlabConnectionConfig {
* List of project topics to exclude when syncing. Projects that match one of the provided `topics` will be excluded from syncing. Glob patterns are supported.
*/
topics?: string[];
/**
* Exclude projects based on GitLab statistics size fields (in bytes).
*/
size?: {
/**
* Minimum project size (in bytes) to sync (inclusive). Projects smaller than this will be excluded.
*/
min?: number;
/**
* Maximum project size (in bytes) to sync (inclusive). Projects larger than this will be excluded.
*/
max?: number;
};
};
revisions?: GitRevisions;
}
Expand Down
27 changes: 27 additions & 0 deletions packages/schemas/src/v3/index.schema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -705,6 +705,18 @@ const schema = {
],
"description": "List of groups to sync with. All projects in the group (and recursive subgroups) visible to the provided `token` (if any) will be synced, unless explicitly defined in the `exclude` property. Subgroups can be specified by providing the path to the subgroup (e.g. `my-group/sub-group-a`)."
},
"minAccessLevel": {
"type": "integer",
"enum": [
5,
10,
20,
30,
40,
50
],
"description": "Minimum GitLab access level required for projects to be returned. Uses GitLab role levels where 20=Reporter, 30=Developer, 40=Maintainer, and 50=Owner."
},
"projects": {
"type": "array",
"items": {
Expand Down Expand Up @@ -777,6 +789,21 @@ const schema = {
"ci"
]
]
},
"size": {
"type": "object",
"description": "Exclude projects based on GitLab statistics size fields (in bytes).",
"properties": {
"min": {
"type": "integer",
"description": "Minimum project size (in bytes) to sync (inclusive). Projects smaller than this will be excluded."
},
"max": {
"type": "integer",
"description": "Maximum project size (in bytes) to sync (inclusive). Projects larger than this will be excluded."
}
},
"additionalProperties": false
}
},
"additionalProperties": false
Expand Down
Loading