Skip to content

Commit 6076025

Browse files
authored
Merge pull request #32 from daeisbae/31-working-repository-analysis-demo
Repository add and analysis working demo
2 parents 377c511 + 98d861c commit 6076025

File tree

25 files changed

+372
-202
lines changed

25 files changed

+372
-202
lines changed

.env.example

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
# Postgres Database Configuration
2+
DB_HOST=
3+
DB_PORT=
4+
DB_NAME=
5+
DB_USER=
6+
DB_PASSWORD=
7+
8+
# Github Token for increasing rate limit of reading the repository
9+
GITHUB_TOKEN=
10+
11+
# LLM_PROVIDER=deepseek | google (for google ai studio)
12+
LLM_PROVIDER=
13+
LLM_APIKEY=
14+
LLM_MODELNAME=

components.json

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
{
2+
"$schema": "https://ui.shadcn.com/schema.json",
3+
"style": "new-york",
4+
"rsc": true,
5+
"tsx": false,
6+
"tailwind": {
7+
"config": "tailwind.config.mjs",
8+
"css": "src/app/globals.css",
9+
"baseColor": "neutral",
10+
"cssVariables": false,
11+
"prefix": ""
12+
},
13+
"aliases": {
14+
"components": "@/components",
15+
"utils": "@/lib/utils",
16+
"ui": "@/components/ui",
17+
"lib": "@/lib",
18+
"hooks": "@/hooks"
19+
},
20+
"iconLibrary": "lucide"
21+
}

package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
"lucide-react": "^0.469.0",
2424
"next": "15.1.0",
2525
"next-mdx-remote": "^5.0.0",
26+
"openai": "^4.77.0",
2627
"pg": "^8.13.1",
2728
"react": "^19.0.0",
2829
"react-dom": "^19.0.0",

src/agent/document-splitter/code-splitter.ts

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,10 @@ enum Language {
77
RUBY = "ruby",
88
RUST = "rust",
99
PHP = "php",
10-
CPP = "cpp"
10+
CPP = "cpp",
11+
JAVA = "java",
12+
SCALA = "scala",
13+
MARKDOWN = "markdown"
1114
}
1215

1316
/**
@@ -46,6 +49,9 @@ export default class CodeSplitter {
4649
'hpp': Language.CPP,
4750
'hxx': Language.CPP,
4851
'h': Language.CPP,
52+
'java': Language.JAVA,
53+
'scala': Language.SCALA,
54+
'md': Language.MARKDOWN
4955
};
5056
return extensionToLanguageMap[extension.toLowerCase()] || null;
5157
}
@@ -54,7 +60,7 @@ export default class CodeSplitter {
5460
* Splits the provided code into chunks based on the file extension.
5561
* @param {string} fileExtension - The file extension indicating the programming language.
5662
* @param {string} code - The code content to be split.
57-
* @returns {Promise<string | null>} - A promise that resolves to an array of document chunks.
63+
* @returns {Promise<string | null>} - A promise that returns the code with line numbers.
5864
*/
5965
async splitCode(fileExtension: string, code: string): Promise<string | null> {
6066
const language : SupportedTextSplitterLanguage | null = this.getLanguageFromExtension(fileExtension);

src/agent/structured-output/index.ts

Lines changed: 39 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import {
77
RepoInfo,
88
} from '@/agent/structured-output/prompt-generator'
99
import { CodePrompt, FolderPrompt } from '@/agent/structured-output/prompt'
10+
import CodeSplitter from '@/agent/document-splitter/code-splitter'
1011

1112
/**
1213
* Abstract base class for processing prompts using LLM
@@ -26,16 +27,22 @@ abstract class BaseProcessor {
2627

2728
/**
2829
* Process the given prompt using LLM and parse the response into JSON format
29-
* @param {any} prompt - The prompt to process
30-
* @param {RepoInfo | null} repoInfo - Information about the repository
30+
* @param {string} prompt - The prompt to process
3131
* @returns Parsed object from the LLM response
3232
*/
33-
protected async process(prompt: any, repoInfo: RepoInfo | null): Promise<object> {
33+
protected async process(prompt: string): Promise<any> {
3434
const response = await this.llm.run(prompt, [])
3535
return await this.schemaParser.parse(response)
3636
}
3737
}
3838

39+
interface CodeSummaryOutput {
40+
name: string
41+
path: string
42+
summary: string
43+
usage: string
44+
}
45+
3946
/**
4047
* Processes code-related prompts with specific schema and formatting
4148
* @example
@@ -45,12 +52,15 @@ abstract class BaseProcessor {
4552
* const result = await codeProcessor.process(sourceCode)
4653
*/
4754
export class CodeProcessor extends BaseProcessor {
55+
private codeSplitter: CodeSplitter
56+
4857
/**
4958
* Creates a new CodeProcessor instance with file schema type
5059
* @param llm - The LLM provider instance
5160
*/
5261
constructor(llm: LLMProvider) {
5362
super(llm)
63+
this.codeSplitter = new CodeSplitter(200, 25)
5464
this.schemaParser = new SchemaParser(getSchema(SchemaType.FILE))
5565
this.promptGenerator = new PromptGenerator(
5666
{
@@ -67,23 +77,43 @@ export class CodeProcessor extends BaseProcessor {
6777

6878
/**
6979
* Process the given code string
70-
* @param {string }code - Source code to process
80+
* @param {string} path - Path of the file
81+
* @param {string} code - Source code to process
7182
* @param {RepoInfo} repoInfo - Information about the repository
7283
* @returns Processed and parsed JSON object defined in schema factory
7384
*/
74-
async process(code: string, repoInfo: RepoInfo): Promise<object> {
85+
async generate(code: string, repoInfo: RepoInfo): Promise<CodeSummaryOutput | null> {
86+
const extension = repoInfo.path.split('.').pop()
87+
if (!extension) {
88+
console.warn('No extension found in the file path')
89+
return null
90+
}
91+
const splittedDoc = await this.codeSplitter.splitCode(extension, code)
92+
if (!splittedDoc) {
93+
console.warn('Code splitting failed')
94+
return null
95+
}
96+
7597
const prompt = await this.promptGenerator.generate(
7698
{
7799
requirements: CodePrompt,
78100
formatInstructions: this.schemaParser.formalInstructions,
79101
...repoInfo,
80102
},
81-
code
103+
splittedDoc
82104
)
83-
return await super.process(prompt, null)
105+
106+
return await super.process(prompt)
84107
}
85108
}
86109

110+
interface FolderSummaryOutput {
111+
name: string
112+
usage: string
113+
path: string
114+
summary: string
115+
}
116+
87117
/**
88118
* Processes folder-related prompts with specific schema and formatting
89119
* @example
@@ -119,7 +149,7 @@ export class FolderProcessor extends BaseProcessor {
119149
* @param {RepoInfo} repoInfo - Information about the repository
120150
* @returns Processed and parsed JSON object defined in schema factory
121151
*/
122-
async process(folder: string[], repoInfo: RepoInfo): Promise<object> {
152+
async generate(folder: string[], repoInfo: RepoInfo): Promise<FolderSummaryOutput | null> {
123153
const prompt = await this.promptGenerator.generate(
124154
{
125155
requirements: FolderPrompt,
@@ -129,6 +159,6 @@ export class FolderProcessor extends BaseProcessor {
129159
undefined,
130160
folder
131161
)
132-
return await super.process(prompt, null)
162+
return await super.process(prompt)
133163
}
134164
}

src/agent/structured-output/prompt.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ You will receive the following information, extracted from a GitHub repository:
2121
* Explain its role in the overall system.
2222
* Identify its dependencies on other modules/components.
2323
* Highlight any important classes, functions, or data structures.
24-
* If you need to refer to a specific code, mention the path to that file by using the following markdown link format: [\`Description of Code Block\`](Full github url of the file including the start line with optional ending line#L{startLine}-L{endLine}). This is in the form of "https://github.com/{owner}/{repo}/blob/{commitSha}/{path}#L{lineStart}-L{lineEnd}".
24+
* Link all the code blocks (Class,Function,Enum,Exception) that are referenced using the following markdown link format: [\`Description of Code Block\`](Full github url of the file including the start line with optional ending line#L{startLine}-L{endLine}). This is in the form of "https://github.com/{owner}/{repo}/blob/{commitSha}/{path}#L{lineStart}-L{lineEnd}".
2525
2. **Code-Level Insights:**
2626
* Analyze the code files to understand the implementation details.
2727
* Identify core algorithms, data structures, and design patterns used.
@@ -56,7 +56,7 @@ You will receive the following information, summarized from the expert software
5656
* Explain its role in the overall system.
5757
* Identify its dependencies on other modules/components/folder.
5858
* Highlight any important classes, functions, or data structures in it's sub-files and sub-folders.
59-
* If you need to refer to a specific code, mention the path to that file by using the following markdown link format: [\`Description of Code Block\`]({Full github url of the file including the start line with optional ending line#L{startLine}-L{endLine}). This is in the form of "https://github.com/{owner}/{repo}/blob/{commitSha}/{path}#L{lineStart}-L{lineEnd}".
59+
* Link all the code blocks that are referenced using the following markdown link format: [\`Description of Code Block\`](Full github url of the file including the start line with optional ending line#L{startLine}-L{endLine}). This is in the form of "https://github.com/{owner}/{repo}/blob/{commitSha}/{path}#L{lineStart}-L{lineEnd}".
6060
2. **Dependencies and Relationships:**
6161
* Clearly document the relationships between different folders and files.
6262
* Explain how different parts of the codebase interact with each other.

src/agent/structured-output/schema-factory.ts

Lines changed: 6 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,11 @@ File Schema Example Output:
4646
path: z
4747
.string()
4848
.describe('Path to the file within the repository.'),
49+
usage: z
50+
.string()
51+
.describe(
52+
'What the file is used for. Describe less than 10 words (ex. Data Parsing, API Requests, etc.)'
53+
),
4954
summary: z
5055
.string()
5156
.describe(
@@ -82,30 +87,13 @@ Folder Schema Example Output:
8287
case SchemaType.FOLDER:
8388
return z.object({
8489
name: z.string().describe('Name of the folder.'),
85-
usage: z.string().describe('What the folder is used for.'),
90+
usage: z.string().describe('What the folder is used for. Describe less than 10 words (ex. Server Lifecycle Management, API Utility Functions, etc.)'),
8691
path: z.string().describe('Path to the folder.'),
8792
summary: z
8893
.string()
8994
.describe(
9095
'Summary of the folder, its main purpose, and its role in the project. Include Markdown links to important code blocks within the file using the format `[{Description of Code Block}]({Full github url of the file including the start line with optional ending line}#L{startLine}-L{endLine})` where applicable.'
91-
),
92-
files: z
93-
.array(
94-
z.object({
95-
fileName: z
96-
.string()
97-
.describe(
98-
'Name of the file mentioned in the summary'
99-
),
100-
filePath: z
101-
.string()
102-
.describe(
103-
'Path to the file mentioned in the summary'
104-
),
105-
})
10696
)
107-
.optional()
108-
.describe('List of files in the folder.'),
10997
})
11098

11199
default:
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
import { FullFolder, FullRepository } from '@/service/get-db'
2+
3+
export function formatToMarkdown(fullRepo: FullRepository): string {
4+
const repo = fullRepo.repository
5+
const branches = fullRepo.branch
6+
const folders = fullRepo.folders
7+
8+
const url = `https://github.com/${repo.owner}/${repo.repo}/blob/${branches?.last_commit_sha}`
9+
10+
let markdown = recursiveFolderToMarkdown(folders[0], url, '#')
11+
12+
return markdown
13+
}
14+
15+
function recursiveFolderToMarkdown(
16+
folder: FullFolder,
17+
url: string,
18+
subfolderHeading: string
19+
): string {
20+
let markdown = `${subfolderHeading} ${folder.usage}\n `
21+
markdown += `---\n`
22+
markdown += folder.path
23+
? `- Reference: [\`${folder.path}\`](${url}/${folder.path}) \n`
24+
: ''
25+
markdown += `\n${folder.ai_summary}\n`
26+
folder.files.forEach((file) => {
27+
markdown += `###### ${file.usage}\n`
28+
markdown += `---\n`
29+
markdown += file.name
30+
? `- Reference: [\`${file.name}\`](${url}/${file.name}) \n`
31+
: ''
32+
markdown += `\n${file.ai_summary}\n`
33+
})
34+
folder.subfolders.forEach((subfolder) => {
35+
markdown += recursiveFolderToMarkdown(
36+
subfolder,
37+
url,
38+
subfolderHeading.length >= 5
39+
? subfolderHeading
40+
: subfolderHeading + '#'
41+
)
42+
})
43+
return markdown
44+
}
Lines changed: 22 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1,60 +1,42 @@
11
import { JSX, Suspense } from 'react'
22
import { RepoCard } from '@/components/RepoCard'
3-
import Loading from '@/app/[ownerSlug]/[repoSlug]/loading'
4-
import { FetchRepoService, FullRepository } from '@/service/get-db'
3+
import { FetchRepoService } from '@/service/get-db'
54
import { MarkdownContent } from '@/components/MarkdownContent'
65
import { notFound } from 'next/navigation'
6+
import { formatToMarkdown } from '@/app/[ownerSlug]/[repoSlug]/formatter'
7+
import Loading from '@/app/[ownerSlug]/[repoSlug]/loading'
78

9+
// Define the correct page props type for Next.js 13+
810
interface PageProps {
9-
params: {
10-
ownerSlug: string
11-
repoSlug: string
12-
}
11+
params: Promise<{ ownerSlug: string; repoSlug: string }>
1312
}
1413

15-
interface RepoPageProps {
16-
ownerSlug: string
17-
repoSlug: string
18-
}
19-
20-
const placeholder = `
21-
# parser.cpp\n\n
22-
23-
- Reference \`parser/parser.cpp\`\n
24-
25-
This file defines the \`Parser\` class, responsible for transforming a stream of tokens into an Abstract Syntax Tree (AST). The parser utilizes a queue of \`TokenPtr\` objects (\`tok_queue_\`) and provides methods for consuming tokens (\`Eat\`), peeking at the next token (\`Peek\`), and expecting specific token types (\`ExpectedTokenType\`). The core functionality resides in \`ProduceAST\` which drives the parsing process by repeatedly calling \`ParseStatement\` until an end-of-line token is encountered. Different parsing methods are present to handle various expressions such as \`ParsePrimaryExpression\`, \`ParseAdditionExpression\`, \`ParseMultiplicationExpression\`, and \`ParseComparisonExpression\`. It supports variable declarations and assignments, and also handles whitespace using \`ParseWhitespaceExpression\`. The parser uses recursive descent parsing strategy with helper functions for each type of expression. It throws \`UnexpectedTokenParsedException\` when unexpected tokens are encountered. The \`Parser\` class depends on the \`TokenPtr\` and the \`ast.hpp\` module for the AST node definitions. Key methods include: [\`Eat\`](https://github.com/daeisbae/AParser/blob/9bbea84efa9f8eeed5576f53e4a65ca87a7f023c/parser/parser.cpp#L11-L15) for consuming tokens, [\`Peek\`](https://github.com/daeisbae/AParser/blob/9bbea84efa9f8eeed5576f53e4a65ca87a7f023c/parser/parser.cpp#L17-L17) for peeking at tokens, [\`ExpectedTokenType\`](https://github.com/daeisbae/AParser/blob/9bbea84efa9f8eeed5576f53e4a65ca87a7f023c/parser/parser.cpp#L19-L39) for validating token types, [\`ProduceAST\`](https://github.com/daeisbae/AParser/blob/9bbea84efa9f8eeed5576f53e4a65ca87a7f023c/parser/parser.cpp#L41-L53) for generating the AST, [\`ParseStatement\`](https://github.com/daeisbae/AParser/blob/9bbea84efa9f8eeed5576f53e4a65ca87a7f023c/parser/parser.cpp#L55-L62) for parsing statements, and [\`ParseExpression\`](https://github.com/daeisbae/AParser/blob/9bbea84efa9f8eeed5576f53e4a65ca87a7f023c/parser/parser.cpp#L64-L66) for parsing expressions. The file also handles different types of expressions using separate parsing methods like [\`ParsePrimaryExpression\`](https://github.com/daeisbae/AParser/blob/9bbea84efa9f8eeed5576f53e4a65ca87a7f023c/parser/parser.cpp#L68-L145), [\`ParseAdditionExpression\`](https://github.com/daeisbae/AParser/blob/9bbea84efa9f8eeed5576f53e4a65ca87a7f023c/parser/parser.cpp#L147-L165), [\`ParseMultiplicationExpression\`](https://github.com/daeisbae/AParser/blob/9bbea84efa9f8eeed5576f53e4a65ca87a7f023c/parser/parser.cpp#L167-L185), [\`ParseWhitespaceExpression\`](https://github.com/daeisbae/AParser/blob/9bbea84efa9f8eeed5576f53e4a65ca87a7f023c/parser/parser.cpp#L187-L192), [\`ParseIdentifierDeclarationExpression\`](https://github.com/daeisbae/AParser/blob/9bbea84efa9f8eeed5576f53e4a65ca87a7f023c/parser/parser.cpp#L194-L217), [\`ParseIdentifierAssignmentExpression\`](https://github.com/daeisbae/AParser/blob/9bbea84efa9f8eeed5576f53e4a65ca87a7f023c/parser/parser.cpp#L219-L243), and [\`ParseComparisonExpression\`](https://github.com/daeisbae/AParser/blob/9bbea84efa9f8eeed5576f53e4a65ca87a7f023c/parser/parser.cpp#L245-L263).
26-
`
14+
// Main page component
15+
export default async function Page({
16+
params,
17+
}: PageProps): Promise<JSX.Element> {
18+
const { ownerSlug, repoSlug } = await params
2719

28-
async function RepoPage({
29-
ownerSlug,
30-
repoSlug,
31-
}: RepoPageProps): Promise<JSX.Element> {
3220
const fetchRepoService = new FetchRepoService()
33-
const repoDetails: FullRepository | null =
34-
await fetchRepoService.getFullRepositoryTree(ownerSlug, repoSlug)
21+
const repoDetails = await fetchRepoService.getFullRepositoryTree(
22+
ownerSlug,
23+
repoSlug
24+
)
3525

3626
if (!repoDetails) {
3727
notFound()
3828
}
3929

4030
return (
4131
<div className="flex gap-6 p-6">
42-
<div className="flex-1">
43-
<MarkdownContent content={placeholder} />
44-
</div>
45-
<div className="w-[300px]">
46-
<RepoCard repoInfo={repoDetails.repository} />
47-
</div>
32+
<Suspense fallback={<Loading />}>
33+
<div className="flex-1">
34+
<MarkdownContent content={formatToMarkdown(repoDetails)} />
35+
</div>
36+
<div className="w-[300px]">
37+
<RepoCard repoInfo={repoDetails.repository} />
38+
</div>
39+
</Suspense>
4840
</div>
4941
)
5042
}
51-
52-
export default function DocumentationPage({ params }: PageProps) {
53-
const { ownerSlug, repoSlug } = params
54-
55-
return (
56-
<Suspense fallback={<Loading />}>
57-
<RepoPage ownerSlug={ownerSlug} repoSlug={repoSlug} />
58-
</Suspense>
59-
)
60-
}

src/app/add-repositories/insert-repo.tsx

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,12 @@
11
'use server'
22

3+
import LLMConfig from '@/llm/llm-config'
4+
import LLMFactory from '@/app/add-repositories/llm-factory'
35
import { InsertRepoService } from '@/service/insert-db'
46

57
export async function insertRepository(owner: string, repo: string) {
6-
const insertRepoService = new InsertRepoService()
8+
const llmConfig = new LLMConfig(1, 0.95, 40, 8192)
9+
const insertRepoService = new InsertRepoService(LLMFactory.createProvider(llmConfig))
710
const result = await insertRepoService.insertRepository(owner, repo)
811
if (!result) {
912
return { success: false, error: 'Repository already exists' }

0 commit comments

Comments
 (0)