geojs/scripts/update_changelog.py
David Manthey 432db0a2e4 docs: Update CHANGELOG
Add a script to make this easier to update (eventually add it to
pre-commit hooks)
2025-11-13 09:36:58 -05:00

389 lines
11 KiB
Python
Executable File

#!/usr/bin/env python3
# Vide coded change log updater/
import os
import re
import subprocess
from collections import defaultdict
from typing import Dict, List, Optional, Set, Tuple
CHANGELOG_PATH = 'CHANGELOG.md'
# Kinds we treat specially when they appear as the PR's 'most significant' type.
NON_USER_KINDS = {'build', 'doc', 'docs', 'chore', 'ci', 'style', 'refactor', 'test'}
# Priority of commit kinds when deciding a PR's 'most significant' component.
# Higher number means more significant.
KIND_PRIORITY = {
'feat': 50,
'fix': 40,
'bug': 40,
'perf': 30,
'other': 20,
# non-user kinds get low priority
'build': 10,
'doc': 10,
'docs': 10,
'chore': 10,
'ci': 10,
'style': 10,
'refactor': 10,
'test': 10,
}
CATEGORY_HEADINGS = {
'feat': 'Features',
'bug': 'Bug Fixes',
'perf': 'Performance Improvements',
'other': 'Other Changes',
}
def run_git(*args: str) -> str:
"""Run a git command and return stdout as text."""
return subprocess.check_output(['git', *args], encoding='utf-8')
def parse_semver(v: str) -> Optional[Tuple[int, int, int]]:
"""Parse semantic version (X.Y.Z) into a tuple."""
m = re.match(r'^(\d+)\.(\d+)\.(\d+)$', v.strip())
if not m:
return None
return tuple(map(int, m.groups()))
def find_latest_version_in_changelog(path: str) -> Optional[str]:
"""Return the first (most recent) '## Version X.Y.Z' in CHANGELOG.md."""
if not os.path.exists(path):
return None
with open(path, 'r', encoding='utf-8') as f:
content = f.read()
matches = re.findall(r'^##\s+Version\s+(\d+\.\d+\.\d+)', content, re.MULTILINE)
return matches[0] if matches else None
def get_semver_tags() -> List[Tuple[str, str]]:
"""Return list of (version, tag_name) sorted ascending by version."""
tags_out = run_git('tag')
tags: List[Tuple[str, str]] = []
for line in tags_out.splitlines():
raw = line.strip()
if not raw:
continue
if raw.startswith('v'):
version = raw[1:]
else:
version = raw
if parse_semver(version):
tags.append((version, raw))
tags.sort(key=lambda vr: parse_semver(vr[0]))
return tags
def classify_prefix(subject: str) -> Optional[str]:
"""
Extract a conventional-commit style type prefix.
Handles:
- 'feat: summary'
- 'feat(scope): summary'
- 'docs(api): update docs'
Returns the type (e.g. 'feat', 'docs') or None if not found.
"""
s = subject.strip().lower()
# type(scope): summary OR type: summary
m = re.match(r'^([a-z]+)(\([^)]*\))?:', s)
if m:
return m.group(1)
# Less common: 'feat(scope) summary' (no colon)
m = re.match(r'^([a-z]+)\([^)]*\)\s', s)
if m:
return m.group(1)
return None
def commit_kind(subject: str) -> str:
"""
Classify a commit into a kind:
feat, fix, bug, perf, build, doc, docs, chore, ci, style, refactor, test, or other
"""
t = classify_prefix(subject)
if t is None:
return 'other'
if t in ('feat', 'fix', 'bug', 'perf',
'build', 'doc', 'docs', 'chore', 'ci', 'style', 'refactor', 'test'):
return t
# Any other typed prefix we treat as 'other' for purposes of significance.
return 'other'
def strip_known_prefix(subject: str) -> str:
"""Strip a leading conventional-commit prefix from a title if present."""
s = subject.strip()
s_lower = s.lower()
m = re.match(r'^([a-z]+)(\([^)]*\))?:\s*(.*)$', s_lower)
if not m:
return s
# Use length of the matched prefix to strip from the original string.
prefix_len = s_lower.find(m.group(3))
if prefix_len <= 0:
return s
return s[prefix_len:].lstrip()
def extract_pr_from_merge(subject: str, body: str) -> Optional[Tuple[int, str]]:
"""
For a merge commit, detect 'Merge pull request #N' and return (N, title).
For GitHub-style merges, the body first non-empty line is the PR title.
"""
m = re.search(r'Merge pull request #(\d+)', subject)
if not m:
m = re.search(r'Merge pull request #(\d+)', body)
if not m:
return None
pr_num = int(m.group(1))
title = ''
for line in body.splitlines():
line = line.strip()
if line:
title = line
break
if not title:
title = subject
title = re.sub(r'Merge pull request #\d+ from \S+', '', title).strip()
if not title:
title = subject.strip()
return pr_num, title
def extract_pr_from_squash(subject: str) -> Optional[Tuple[int, str]]:
"""
For squash/rebase merges, detect 'Some title (#123)' and return (123, title).
"""
m = re.search(r'\(#(\d+)\)', subject)
if not m:
return None
pr_num = int(m.group(1))
title = re.sub(r'\s*\(#\d+\)\s*$', '', subject).strip()
return pr_num, title or subject.strip()
def get_commits_for_merge(merge_sha: str) -> List[str]:
"""
Return the list of non-merge commits included in a merge (parent1..merge).
This excludes the merge commit itself and other merge commits in the range.
"""
parents = run_git('show', '-s', '--format=%P', merge_sha).strip().split()
if not parents:
return [merge_sha]
base = parents[0]
revs = run_git('rev-list', '--no-merges', f'{base}..{merge_sha}')
return [r.strip() for r in revs.splitlines() if r.strip()]
def categorize_pr(pr_num: int, commit_shas: Set[str],
subject_cache: Dict[str, str]) -> Optional[str]:
"""
Categorize a PR based on its component commits.
Returns:
'feat', 'bug', 'perf', 'other', or None if the PR should be skipped.
If the most significant kind is one of:
build, doc, docs, chore, ci, style, refactor
the PR is excluded.
"""
best_kind: Optional[str] = None
best_score = -1
for sha in commit_shas:
if sha not in subject_cache:
subject_cache[sha] = run_git('show', '-s', '--format=%s', sha).strip()
kind = commit_kind(subject_cache[sha])
score = KIND_PRIORITY.get(kind, 0)
if score > best_score:
best_score = score
best_kind = kind
if best_kind is None:
return None
if best_kind in NON_USER_KINDS:
return None
if best_kind == 'feat':
return 'feat'
if best_kind in ('fix', 'bug'):
return 'bug'
if best_kind == 'perf':
return 'perf'
return 'other'
def build_section_for_range(version: str, tag: str, prev_tag: Optional[str]) -> Optional[str]: # noqa
"""
Build a changelog section for a version from git log between prev_tag..tag.
Only PRs are included; each PR is categorized by the most significant
component commit. PRs that are build/docs/chore/ci/style/refactor-only
are skipped.
"""
if prev_tag:
range_expr = f'{prev_tag}..{tag}'
else:
range_expr = tag
fmt = '%H%x01%s%x01%b%x00'
out = run_git('log', f'--pretty=format:{fmt}', range_expr)
entries = out.split('\x00')
pr_titles: Dict[int, str] = {}
pr_merge_shas: Dict[int, str] = {}
pr_component_shas: Dict[int, Set[str]] = defaultdict(set)
for entry in entries:
entry = entry.strip()
if not entry:
continue
parts = entry.split('\x01', 2)
if len(parts) < 2:
continue
sha = parts[0]
subject = parts[1].strip()
body = parts[2] if len(parts) > 2 else ''
merge_info = extract_pr_from_merge(subject, body)
if merge_info:
pr_num, title = merge_info
pr_merge_shas[pr_num] = sha
if pr_num not in pr_titles:
pr_titles[pr_num] = title
continue
squash_info = extract_pr_from_squash(subject)
if squash_info:
pr_num, title = squash_info
pr_component_shas[pr_num].add(sha)
if pr_num not in pr_titles:
pr_titles[pr_num] = title
continue
if not pr_titles and not pr_merge_shas and not pr_component_shas:
return None
subject_cache: Dict[str, str] = {}
cat_to_lines: Dict[str, List[str]] = {k: [] for k in CATEGORY_HEADINGS}
pr_nums: Set[int] = set(pr_titles.keys()) | set(pr_merge_shas.keys()) | set(
pr_component_shas.keys())
for pr_num in sorted(pr_nums):
commit_shas: Set[str] = set()
if pr_num in pr_merge_shas:
commit_shas.update(get_commits_for_merge(pr_merge_shas[pr_num]))
if pr_num in pr_component_shas:
commit_shas.update(pr_component_shas[pr_num])
if not commit_shas:
continue
category = categorize_pr(pr_num, commit_shas, subject_cache)
if category is None:
continue
raw_title = pr_titles.get(pr_num, f'PR #{pr_num}')
title = strip_known_prefix(raw_title)
line = f'- {title} ([#{pr_num}](../../pull/{pr_num}))'
cat_to_lines[category].append(line)
if not any(cat_to_lines[k] for k in cat_to_lines):
return None
parts: List[str] = []
parts.append(f'## Version {version}\n')
for key in ['feat', 'bug', 'perf', 'other']:
lines = cat_to_lines[key]
if not lines:
continue
heading = CATEGORY_HEADINGS[key]
parts.append(f'### {heading}\n')
parts.extend(lines)
parts.append('')
parts.append('')
return '\n'.join(parts)
def main() -> None:
latest_in_file = find_latest_version_in_changelog(CHANGELOG_PATH)
print(f'Latest version in changelog: {latest_in_file}')
tags = get_semver_tags()
if not tags:
print('No semantic-version tags found.')
return
if latest_in_file:
latest_tuple = parse_semver(latest_in_file)
pending = [(v, t) for v, t in tags if parse_semver(v) > latest_tuple]
else:
pending = tags[:]
if not pending:
print('No newer tags than changelog.')
return
version_to_index = {v: i for i, (v, _) in enumerate(tags)}
pending_sorted = sorted(pending, key=lambda vt: parse_semver(vt[0]), reverse=True)
new_sections: List[str] = []
for version, tag in pending_sorted:
idx = version_to_index[version]
prev_tag = tags[idx - 1][1] if idx > 0 else None
print(f'Building section for {version} (tag {tag}, prev {prev_tag})')
section = build_section_for_range(version, tag, prev_tag)
if section:
new_sections.append(section)
else:
print(f' Skipped {version} (no user-visible PR changes)')
if not new_sections:
print('No sections to add.')
return
new_text = '\n'.join(new_sections)
if os.path.exists(CHANGELOG_PATH):
with open(CHANGELOG_PATH, 'r', encoding='utf-8') as f:
old_content = f.read()
else:
old_content = ''
if old_content.strip():
m = re.match(r'^(# .*\n+)', old_content)
if m:
header = m.group(1)
rest = old_content[len(header):]
updated = header + '\n' + new_text + rest
else:
updated = new_text + old_content
else:
updated = '# GeoJS Change Log\n\n' + new_text
with open(CHANGELOG_PATH, 'w', encoding='utf-8') as f:
f.write(updated)
print(f'Updated {CHANGELOG_PATH} with {len(new_sections)} new version(s).')
if __name__ == '__main__':
main()