diff --git a/src/github/filterfile.ts b/src/github/filterfile.ts index 97b3722..d9b0f28 100644 --- a/src/github/filterfile.ts +++ b/src/github/filterfile.ts @@ -16,6 +16,20 @@ export function whitelistedFile( ) } +/** + * Filter the files based on the regex patterns + * @param {Array} files - The files to filter (Provided by fetchGithubRepoTree) + * @param {Array} regexFilter - The regex patterns to allow the files + * @returns {Array} - The allowed files (folders that match the patterns) + */ +export function blacklistedFiles(files: string[], regexFilter: string[]) { + const filterPatterns = regexFilter.map((pattern) => new RegExp(pattern)) + return files.filter((file) => + !filterPatterns.some((pattern) => pattern.test(file.toLowerCase())) + ) +} + + /** * Filter the folders based on the regex patterns * @param {Array} folders - The folders to filter (Provided by fetchGithubRepoTree) @@ -38,7 +52,7 @@ export const whitelistedFilter = [ '\\.ts$', '\\.java$', '\\.scala', - 'README.md', + '\\.md', '\\.cpp$', '\\.cc$', '\\.cxx$', @@ -51,6 +65,22 @@ export const whitelistedFilter = [ '\\.php$', ] +export const blacklistedFile = [ + '__init__.py', + 'setup.py', + 'next-env.d.ts', + 'license.md', + 'contributor.md', + 'contributing.md', + 'contrib.md', + 'code_of_conduct.md', + 'security.md', + 'development.md', + 'funding.md', + 'pull_request_template.md', + 'issue_template.md' +] + export const blacklistedFilter = [ 'node_modules', '.github', diff --git a/src/service/insert-db.ts b/src/service/insert-db.ts index 9976875..8fd970d 100644 --- a/src/service/insert-db.ts +++ b/src/service/insert-db.ts @@ -3,7 +3,7 @@ import { Branch, BranchData } from '@/db/models/branch' import { Folder, FolderData } from '@/db/models/folder' import { File, FileData } from '@/db/models/file' import { fetchGithubRepoFile, fetchGithubRepoDetails, fetchGithubRepoTree, RepoTreeResult } from '@/github/fetchrepo' -import { whitelistedFilter, whitelistedFile, blacklistedFilter, blacklistedFolder } from '@/github/filterfile' +import { whitelistedFilter, whitelistedFile, blacklistedFilter, blacklistedFolder, blacklistedFiles, blacklistedFile } from '@/github/filterfile' import { FolderProcessor, CodeProcessor } from '@/agent/structured-output/index' import { LLMProvider } from '@/llm/llm-provider' @@ -74,7 +74,8 @@ export class InsertRepoService { branchId: number, parentFolderId: number | null ): Promise { - const allowedFiles = whitelistedFile(tree.files, whitelistedFilter); + let allowedFiles = whitelistedFile(tree.files, whitelistedFilter); + allowedFiles = blacklistedFiles(allowedFiles, blacklistedFile); const allowedFolders = blacklistedFolder(tree.subdirectories, blacklistedFilter);