Skip to content

Commit 414b617

Browse files
authored
Merge pull request #1613 from ksylvan/0714-fixes-for-custom-directory-unique-patterns-list-changelog-cache
Improve AI Summarization for Consistent Professional Changelog Entries
2 parents 47f7523 + f63e0df commit 414b617

File tree

9 files changed

+465
-253
lines changed

9 files changed

+465
-253
lines changed

.vscode/settings.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@
9999
"seaborn",
100100
"semgrep",
101101
"sess",
102+
"storer",
102103
"Streamlit",
103104
"stretchr",
104105
"talkpanel",

CHANGELOG.md

Lines changed: 205 additions & 228 deletions
Large diffs are not rendered by default.

cmd/generate_changelog/changelog.db

20 KB
Binary file not shown.

cmd/generate_changelog/internal/changelog/generator.go

Lines changed: 45 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -82,8 +82,8 @@ func (g *Generator) collectData() error {
8282
if cachedTag != "" {
8383
// Get the current latest tag from git
8484
currentTag, err := g.gitWalker.GetLatestTag()
85-
if err == nil && currentTag == cachedTag {
86-
// Same tag - load cached data and walk commits since tag for "Unreleased"
85+
if err == nil {
86+
// Load cached data - we can use it even if there are new tags
8787
cachedVersions, err := g.cache.GetVersions()
8888
if err == nil && len(cachedVersions) > 0 {
8989
g.versions = cachedVersions
@@ -97,7 +97,25 @@ func (g *Generator) collectData() error {
9797
}
9898
}
9999

100-
// Walk commits since the latest tag to get new unreleased commits
100+
// If we have new tags since cache, process the new versions only
101+
if currentTag != cachedTag {
102+
fmt.Fprintf(os.Stderr, "Processing new versions since %s...\n", cachedTag)
103+
newVersions, err := g.gitWalker.WalkHistorySinceTag(cachedTag)
104+
if err != nil {
105+
fmt.Fprintf(os.Stderr, "Warning: Failed to walk history since tag %s: %v\n", cachedTag, err)
106+
} else {
107+
// Merge new versions into cached versions (only add if not already cached)
108+
for name, version := range newVersions {
109+
if name != "Unreleased" { // Handle Unreleased separately
110+
if _, exists := g.versions[name]; !exists {
111+
g.versions[name] = version
112+
}
113+
}
114+
}
115+
}
116+
}
117+
118+
// Always update Unreleased section with latest commits
101119
unreleasedVersion, err := g.gitWalker.WalkCommitsSinceTag(currentTag)
102120
if err != nil {
103121
fmt.Fprintf(os.Stderr, "Warning: Failed to walk commits since tag %s: %v\n", currentTag, err)
@@ -110,6 +128,29 @@ func (g *Generator) collectData() error {
110128
g.versions["Unreleased"] = unreleasedVersion
111129
}
112130

131+
// Save any new versions to cache (after potential AI processing)
132+
if currentTag != cachedTag {
133+
for _, version := range g.versions {
134+
// Skip versions that were already cached and Unreleased
135+
if version.Name != "Unreleased" {
136+
if err := g.cache.SaveVersion(version); err != nil {
137+
fmt.Fprintf(os.Stderr, "Warning: Failed to save version to cache: %v\n", err)
138+
}
139+
140+
for _, commit := range version.Commits {
141+
if err := g.cache.SaveCommit(commit, version.Name); err != nil {
142+
fmt.Fprintf(os.Stderr, "Warning: Failed to save commit to cache: %v\n", err)
143+
}
144+
}
145+
}
146+
}
147+
148+
// Update the last processed tag
149+
if err := g.cache.SetLastProcessedTag(currentTag); err != nil {
150+
fmt.Fprintf(os.Stderr, "Warning: Failed to update last processed tag: %v\n", err)
151+
}
152+
}
153+
113154
return nil
114155
}
115156
}
@@ -298,6 +339,7 @@ func (g *Generator) formatVersion(version *git.Version) string {
298339
}
299340
}
300341

342+
// For released versions, if we have cached AI summary, use it!
301343
if version.Name != "Unreleased" && version.AISummary != "" {
302344
fmt.Fprintf(os.Stderr, "✅ %s already summarized (skipping)\n", version.Name)
303345
sb.WriteString(version.AISummary)
@@ -529,8 +571,6 @@ func normalizeLineEndings(content string) string {
529571
}
530572

531573
func (g *Generator) formatCommitMessage(message string) string {
532-
prefixes := []string{"fix:", "feat:", "docs:", "style:", "refactor:",
533-
"test:", "chore:", "perf:", "ci:", "build:", "revert:", "# docs:"}
534574
strings_to_remove := []string{
535575
"### CHANGES\n", "## CHANGES\n", "# CHANGES\n",
536576
"...\n", "---\n", "## Changes\n", "## Change",
@@ -543,13 +583,6 @@ func (g *Generator) formatCommitMessage(message string) string {
543583
// No hard tabs
544584
message = strings.ReplaceAll(message, "\t", " ")
545585

546-
for _, prefix := range prefixes {
547-
if strings.HasPrefix(strings.ToLower(message), prefix) {
548-
message = strings.TrimSpace(message[len(prefix):])
549-
break
550-
}
551-
}
552-
553586
if len(message) > 0 {
554587
message = strings.ToUpper(message[:1]) + message[1:]
555588
}

cmd/generate_changelog/internal/changelog/summarize.go

Lines changed: 32 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,38 @@ import (
1010
const DefaultSummarizeModel = "claude-sonnet-4-20250514"
1111
const MinContentLength = 256 // Minimum content length to consider for summarization
1212

13+
const prompt = `# ROLE
14+
You are an expert Technical Writer specializing in creating clear, concise,
15+
and professional release notes from raw Git commit logs.
16+
17+
# TASK
18+
Your goal is to transform a provided block of Git commit logs into a clean,
19+
human-readable changelog summary. You will identify the most important changes,
20+
format them as a bulleted list, and preserve the associated Pull Request (PR)
21+
information.
22+
23+
# INSTRUCTIONS:
24+
Follow these steps in order:
25+
1. Deeply analyze the input. You will be given a block of text containing PR
26+
information and commit log messages. Carefully read through the logs
27+
to identify individual commits and their descriptions.
28+
2. Identify Key Changes: Focus on commits that represent significant changes,
29+
such as new features ("feat"), bug fixes ("fix"), performance improvements ("perf"),
30+
or breaking changes ("BREAKING CHANGE").
31+
3. Select the Top 5: From the identified key changes, select a maximum of the five
32+
(5) most impactful ones to include in the summary.
33+
If there are five or fewer total changes, include all of them.
34+
4. Format the Output:
35+
- Where you see a PR header, include the PR header verbatim. NO CHANGES.
36+
**This is a critical rule: Do not modify the PR header, as it contains
37+
important links.** What follow the PR header are the related changes.
38+
- Do not add any additional text or preamble. Begin directly with the output.
39+
- Use bullet points for each key change. Starting each point with a hyphen ("-").
40+
- Ensure that the summary is concise and focused on the main changes.
41+
- The summary should be in American English (en-US), using proper grammar and punctuation.
42+
5. If the content is too brief or you do not see any PR headers, return the content as is.
43+
`
44+
1345
// getSummarizeModel returns the model to use for AI summarization
1446
func getSummarizeModel() string {
1547
if model := os.Getenv("FABRIC_CHANGELOG_SUMMARIZE_MODEL"); model != "" {
@@ -30,17 +62,6 @@ func SummarizeVersionContent(content string) (string, error) {
3062

3163
model := getSummarizeModel()
3264

33-
prompt := `Summarize the changes extracted from Git commit logs in a concise, professional way.
34-
Pay particular attention to the following rules:
35-
- Preserve the PR headers verbatim to your summary.
36-
- I REPEAT: Do not change the PR headers in any way. They contain links to the PRs and Author Profiles.
37-
- Use bullet points for lists and key changes (rendered using "-")
38-
- Focus on the main changes and improvements.
39-
- Avoid unnecessary details or preamble.
40-
- Keep it under 800 characters.
41-
- Be brief. List only the 5 most important changes along with the PR information which should be kept intact.
42-
- If the content is too brief or you do not see any PR headers, return the content as is.`
43-
4465
cmd := exec.Command("fabric", "-m", model, prompt)
4566
cmd.Stdin = strings.NewReader(content)
4667

cmd/generate_changelog/internal/git/walker.go

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,12 @@ import (
55
"regexp"
66
"strconv"
77
"strings"
8+
"time"
89

910
"github.com/go-git/go-git/v5"
1011
"github.com/go-git/go-git/v5/plumbing"
1112
"github.com/go-git/go-git/v5/plumbing/object"
13+
"github.com/go-git/go-git/v5/plumbing/storer"
1214
)
1315

1416
var (
@@ -280,6 +282,102 @@ func parseGitHubURL(url string) (owner, repo string) {
280282
return "", ""
281283
}
282284

285+
// WalkHistorySinceTag walks git history from HEAD down to (but not including) the specified tag
286+
// and returns any version commits found along the way
287+
func (w *Walker) WalkHistorySinceTag(sinceTag string) (map[string]*Version, error) {
288+
// Get the commit SHA for the sinceTag
289+
tagRef, err := w.repo.Tag(sinceTag)
290+
if err != nil {
291+
return nil, fmt.Errorf("failed to get tag %s: %w", sinceTag, err)
292+
}
293+
294+
tagCommit, err := w.repo.CommitObject(tagRef.Hash())
295+
if err != nil {
296+
return nil, fmt.Errorf("failed to get commit for tag %s: %w", sinceTag, err)
297+
}
298+
299+
// Get HEAD reference
300+
ref, err := w.repo.Head()
301+
if err != nil {
302+
return nil, fmt.Errorf("failed to get HEAD: %w", err)
303+
}
304+
305+
// Walk from HEAD down to the tag commit (excluding it)
306+
commitIter, err := w.repo.Log(&git.LogOptions{
307+
From: ref.Hash(),
308+
Order: git.LogOrderCommitterTime,
309+
})
310+
if err != nil {
311+
return nil, fmt.Errorf("failed to create commit iterator: %w", err)
312+
}
313+
defer commitIter.Close()
314+
315+
versions := make(map[string]*Version)
316+
currentVersion := "Unreleased"
317+
318+
err = commitIter.ForEach(func(c *object.Commit) error {
319+
// Stop iteration when the hash of the current commit matches the hash of the specified sinceTag commit
320+
if c.Hash == tagCommit.Hash {
321+
return storer.ErrStop
322+
}
323+
324+
commit := &Commit{
325+
SHA: c.Hash.String(),
326+
Message: strings.TrimSpace(c.Message),
327+
Author: c.Author.Name,
328+
Email: c.Author.Email,
329+
Date: c.Author.When,
330+
IsMerge: len(c.ParentHashes) > 1,
331+
}
332+
333+
// Check for version pattern
334+
if matches := versionPattern.FindStringSubmatch(commit.Message); len(matches) > 1 {
335+
commit.IsVersion = true
336+
commit.Version = matches[1]
337+
currentVersion = commit.Version
338+
339+
if _, exists := versions[currentVersion]; !exists {
340+
versions[currentVersion] = &Version{
341+
Name: currentVersion,
342+
Date: commit.Date,
343+
CommitSHA: commit.SHA,
344+
Commits: []*Commit{},
345+
}
346+
}
347+
return nil
348+
}
349+
350+
// Check for PR merge pattern
351+
if commit.IsMerge {
352+
if matches := prPattern.FindStringSubmatch(commit.Message); len(matches) > 1 {
353+
if prNumber, err := strconv.Atoi(matches[1]); err == nil {
354+
commit.PRNumber = prNumber
355+
}
356+
}
357+
}
358+
359+
// Add commit to current version
360+
if _, exists := versions[currentVersion]; !exists {
361+
versions[currentVersion] = &Version{
362+
Name: currentVersion,
363+
Date: time.Time{}, // Zero value, will be set by version commit
364+
CommitSHA: "",
365+
Commits: []*Commit{},
366+
}
367+
}
368+
369+
versions[currentVersion].Commits = append(versions[currentVersion].Commits, commit)
370+
return nil
371+
})
372+
373+
// Handle the stop condition - storer.ErrStop is expected
374+
if err == storer.ErrStop {
375+
err = nil
376+
}
377+
378+
return versions, err
379+
}
380+
283381
func dedupInts(ints []int) []int {
284382
seen := make(map[int]bool)
285383
result := []int{}

internal/core/plugin_registry.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -259,8 +259,24 @@ func (o *PluginRegistry) GetModels() (ret *ai.VendorsModels, err error) {
259259
func (o *PluginRegistry) Configure() (err error) {
260260
o.ConfigureVendors()
261261
_ = o.Defaults.Configure()
262+
if err := o.CustomPatterns.Configure(); err != nil {
263+
return fmt.Errorf("error configuring CustomPatterns: %w", err)
264+
}
262265
_ = o.PatternsLoader.Configure()
263266

267+
// Refresh the database custom patterns directory after custom patterns plugin is configured
268+
customPatternsDir := os.Getenv("CUSTOM_PATTERNS_DIRECTORY")
269+
if customPatternsDir != "" {
270+
// Expand home directory if needed
271+
if strings.HasPrefix(customPatternsDir, "~/") {
272+
if homeDir, err := os.UserHomeDir(); err == nil {
273+
customPatternsDir = filepath.Join(homeDir, customPatternsDir[2:])
274+
}
275+
}
276+
o.Db.Patterns.CustomPatternsDir = customPatternsDir
277+
o.PatternsLoader.Patterns.CustomPatternsDir = customPatternsDir
278+
}
279+
264280
//YouTube and Jina are not mandatory, so ignore not configured error
265281
_ = o.YouTube.Configure()
266282
_ = o.Jina.Configure()

internal/plugins/db/fsdb/sessions.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,9 +86,10 @@ func (o *Session) String() (ret string) {
8686
ret += fmt.Sprintf("\n--- \n[%v]\n%v", message.Role, message.Content)
8787
if message.MultiContent != nil {
8888
for _, part := range message.MultiContent {
89-
if part.Type == chat.ChatMessagePartTypeImageURL {
89+
switch part.Type {
90+
case chat.ChatMessagePartTypeImageURL:
9091
ret += fmt.Sprintf("\n%v: %v", part.Type, *part.ImageURL)
91-
} else if part.Type == chat.ChatMessagePartTypeText {
92+
case chat.ChatMessagePartTypeText:
9293
ret += fmt.Sprintf("\n%v: %v", part.Type, part.Text)
9394
}
9495
}

0 commit comments

Comments
 (0)