From e92ee6d4350135ac274778c2ba138a794b0e5fa8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dae=E2=9D=A4=EF=B8=8F?= <74119677+daeisbae@users.noreply.github.com> Date: Sun, 15 Dec 2024 00:03:45 -0800 Subject: [PATCH] Implement github repo crawling with file filter (#3) --- package.json | 1 + src/github/fetchrepo.js | 83 ++++++++++++++ src/github/filterfile.js | 12 ++ src/test/github/fetchrepo.test.js | 176 +++++++++++++++++++++++++++++ src/test/github/filterfile.test.js | 52 +++++++++ 5 files changed, 324 insertions(+) diff --git a/package.json b/package.json index b5f0960..01a05ce 100644 --- a/package.json +++ b/package.json @@ -11,6 +11,7 @@ "lint": "next lint" }, "dependencies": { + "axios": "^1.7.9", "next": "15.1.0", "react": "^19.0.0", "react-dom": "^19.0.0" diff --git a/src/github/fetchrepo.js b/src/github/fetchrepo.js index e69de29..1d3872a 100644 --- a/src/github/fetchrepo.js +++ b/src/github/fetchrepo.js @@ -0,0 +1,83 @@ +import axios from "axios"; + +/** + * Fetches details about a GitHub repository + * @param {string} owner - The repository owner + * @param {string} repo - The repository name + * @returns {Promise<{language: string, description: string, stars: number, forks: number, url: string, topics: string[], repo_owner: string, repo_name: string, default_branch: string }>} + */ +export async function fetchGithubRepoDetails(owner, repo) { + const repoUrl = `https://api.github.com/repos/${owner}/${repo}`; + + try { + const { data } = await axios.get(repoUrl); + return { + repoOwner: data.owner.login, + repoName: data.name, + url: data.html_url, + topics: data.topics, + language: data.language, + description: data.description, + stars: data.stargazers_count, + forks: data.forks_count, + defaultBranch: data.default_branch, + }; + } catch (error) { + console.error("Error fetching repository details:", error); + throw error; + } +} + +/** + * Fetches contents of a GitHub repository + * @param {string} owner - The repository owner + * @param {string} repo - The repository name + * @param {string} sha - The commit sha of the repository + * @param {string} [path=''] - Optional path within repository + * @returns {Promise<{path: string, files: Array, subdirectories: Array}>} + */ +export async function fetchGithubRepoTree(owner, repo, sha, path = '') { + const contentsUrl = `https://api.github.com/repos/${owner}/${repo}/contents/${path}?ref=${sha}`; + + try { + const { data } = await axios.get(contentsUrl); + const result = { + path: path, + files: [], + subdirectories: [] + }; + + for (const item of data) { + if (item.type === 'file') { + result.files.push(item.path); + } else if (item.type === 'dir') { + const subDir = await fetchGithubRepoTree(owner, repo, sha, item.path); + result.subdirectories.push(subDir); + } + } + + return result; + } catch (error) { + console.error("Error fetching repository contents:", error); + throw error; + } +} + +/** + * Fetches a file from a GitHub repository + * @param {string} owner - The repository owner + * @param {string} repo - The repository name + * @param {string} sha - The commit sha of the file to fetch + * @param {string} path - The path of the file to fetch + * @returns {Promise} + */ +export async function fetchGithubRepoFile(owner, repo, sha, path) { + const codeUrl = `https://raw.githubusercontent.com/${owner}/${repo}/${sha}/${path}`; + + try { + return await axios.get(codeUrl); + } catch (error) { + console.error("Error fetching repository file:", error); + throw error; + } +} \ No newline at end of file diff --git a/src/github/filterfile.js b/src/github/filterfile.js index e69de29..0014b3f 100644 --- a/src/github/filterfile.js +++ b/src/github/filterfile.js @@ -0,0 +1,12 @@ +/** + * Filter the files based on the regex patterns + * @param {Array} files - The files to filter (Provided by fetchGithubRepoTree) + * @param {Array} regexFilter - The regex patterns to filter the files + * @returns {Array} - The filtered files (files that do NOT match the patterns) + */ +export function filterFile(files, regexFilter) { + const filterPatterns = regexFilter.map((pattern) => new RegExp(pattern)); + return files.filter((file) => + !filterPatterns.some((pattern) => pattern.test(file)) // Negate the match + ); +} diff --git a/src/test/github/fetchrepo.test.js b/src/test/github/fetchrepo.test.js index e69de29..7f6489e 100644 --- a/src/test/github/fetchrepo.test.js +++ b/src/test/github/fetchrepo.test.js @@ -0,0 +1,176 @@ +import axios from "axios"; +import { jest } from "@jest/globals"; +import { fetchGithubRepoDetails, fetchGithubRepoTree, fetchGithubRepoFile } from "@/github/fetchrepo.js"; + +const axiosMock = jest.mock("axios"); + +describe("fetchGithubRepoDetails", () => { + it("should fetch and return the repository details correctly", async () => { + const mockResponse = { + data: { + owner: { + login: "octocat", + }, + name: "Hello-World", + html_url: "https://github.com/octocat/Hello-World", + description: "My first repository on GitHub!", + stargazers_count: 2769, + forks_count: 2499, + topics: [], + language: "JavaScript", + default_branch: "master", + }, + }; + + axios.get = jest.fn(() => mockResponse); + + const result = await fetchGithubRepoDetails("octocat", "Hello-World"); + + const expectedOutput = { + repoOwner: "octocat", + repoName: "Hello-World", + url: "https://github.com/octocat/Hello-World", + topics: [], + language: "JavaScript", + description: "My first repository on GitHub!", + stars: 2769, + forks: 2499, + defaultBranch: "master", + }; + + expect(result).toEqual(expectedOutput); + }); +}); + +describe("fetchGithubRepoTree", () => { + it("should fetch single full file tree structure", async () => { + const mockResponse = { + data: [ + { + "name": "README", + "path": "README", + "sha": "980a0d5f19a64b4b30a87d4206aade58726b60e3", + "size": 13, + "url": "https://api.github.com/repos/octocat/Hello-World/contents/README?ref=master", + "html_url": "https://github.com/octocat/Hello-World/blob/master/README", + "git_url": "https://api.github.com/repos/octocat/Hello-World/git/blobs/980a0d5f19a64b4b30a87d4206aade58726b60e3", + "download_url": "https://raw.githubusercontent.com/octocat/Hello-World/master/README", + "type": "file", + "_links": { + "self": "https://api.github.com/repos/octocat/Hello-World/contents/README?ref=master", + "git": "https://api.github.com/repos/octocat/Hello-World/git/blobs/980a0d5f19a64b4b30a87d4206aade58726b60e3", + "html": "https://github.com/octocat/Hello-World/blob/master/README" + } + } + ] + }; + + const expectedOutput = { + path: "", + files: ["README"], + subdirectories: [] + }; + + axios.get = jest.fn(() => mockResponse); + + const result = await fetchGithubRepoTree("octocat", "Hello-World", "7fd1a60b01f91b314f59955a4e4d4e80d8edf11d", ""); + + expect(result).toEqual(expectedOutput); + }); + + + it("should fetch repository tree structure recursively", async () => { + // Mock root directory response + const mockRootResponse = { + data: [ + { + name: ".github", + path: ".github", + type: "dir" + }, + { + name: "README.md", + path: "README.md", + type: "file" + }, + { + name: "main.cpp", + path: "main.cpp", + type: "file" + } + ] + }; + + // Mock .github directory response + const mockGithubDirResponse = { + data: [ + { + name: "workflows", + path: ".github/workflows", + type: "dir" + } + ] + }; + + // Mock workflows directory response + const mockWorkflowsResponse = { + data: [ + { + name: "ci.yml", + path: ".github/workflows/ci.yml", + type: "file" + } + ] + }; + + axios.get + .mockResolvedValueOnce(mockRootResponse) + .mockResolvedValueOnce(mockGithubDirResponse) + .mockResolvedValueOnce(mockWorkflowsResponse); + + const result = await fetchGithubRepoTree("daeisbae", "AParser"); + + const expectedOutput = { + path: "", + files: ["README.md", "main.cpp"], + subdirectories: [ + { + path: ".github", + files: [], + subdirectories: [ + { + path: ".github/workflows", + files: [".github/workflows/ci.yml"], + subdirectories: [] + } + ] + } + ] + } + + expect(result).toEqual(expectedOutput); + }); +}); + +describe("fetchGithubRepoFile", () => { + it("should fetch file with specific sha and empty path", async () => { + const mockResponse = { + data: "Hello World!" + }; + + axios.get = jest.fn().mockResolvedValue(mockResponse); + + const result = await fetchGithubRepoFile( + "octocat", + "Hello-World", + "7fd1a60b01f91b314f59955a4e4d4e80d8edf11d", + "" + ); + + expect(result).toEqual(mockResponse); + expect(axios.get).toHaveBeenCalledWith( + "https://raw.githubusercontent.com/octocat/Hello-World/7fd1a60b01f91b314f59955a4e4d4e80d8edf11d/" + ); + }); +}); + diff --git a/src/test/github/filterfile.test.js b/src/test/github/filterfile.test.js index e69de29..9ee133a 100644 --- a/src/test/github/filterfile.test.js +++ b/src/test/github/filterfile.test.js @@ -0,0 +1,52 @@ +import { filterFile } from '@/github/filterfile'; + +describe('filterFile', () => { + it('should remove files that match a simple pattern', () => { + const files = ['index.js', 'README.md', 'node_modules/library.js']; + const regexFilter = ['node_modules']; + const result = filterFile(files, regexFilter); + expect(result).toEqual(['index.js', 'README.md']); + }); + + it('should remove files that match a regex for specific extensions', () => { + const files = ['index.js', 'app.json', 'style.css']; + const regexFilter = ['.*\\.json$']; + const result = filterFile(files, regexFilter); + expect(result).toEqual(['index.js', 'style.css']); + }); + + it('should remove files that match multiple regex patterns', () => { + const files = ['index.js', 'README.md', '.github/config.yml', 'node_modules/library.js']; + const regexFilter = ['node_modules', '\\.github']; + const result = filterFile(files, regexFilter); + expect(result).toEqual(['index.js', 'README.md']); + }); + + it('should retain all files if regex patterns do not match any file', () => { + const files = ['index.js', 'README.md', 'style.css']; + const regexFilter = ['non_existent_folder', '\\.unknown$']; + const result = filterFile(files, regexFilter); + expect(result).toEqual(['index.js', 'README.md', 'style.css']); + }); + + it('should remove all files if the regex matches everything', () => { + const files = ['index.js', 'README.md', 'style.css']; + const regexFilter = ['.*']; + const result = filterFile(files, regexFilter); + expect(result).toEqual([]); + }); + + it('should handle an empty file list gracefully', () => { + const files = []; + const regexFilter = ['.*']; + const result = filterFile(files, regexFilter); + expect(result).toEqual([]); + }); + + it('should handle an empty regex filter gracefully', () => { + const files = ['index.js', 'README.md']; + const regexFilter = []; + const result = filterFile(files, regexFilter); + expect(result).toEqual(['index.js', 'README.md']); + }); +});