新增 release-notes.py 用于爬取 Github 上的 release note 信息,使用了 ChatGPT 辅助。

main
greatbody 2023-02-14 12:05:09 +08:00
parent 79f9fd9e60
commit cce0894f7a
No known key found for this signature in database
GPG Key ID: 01CEB6267272A9A5
1 changed files with 70 additions and 0 deletions

70
python3/release-notes.py Normal file
View File

@ -0,0 +1,70 @@
import argparse
import re
from bs4 import BeautifulSoup
import requests
def parse_github_repo(github_url):
pattern = r"https://github.com/([^/]+)/([^/]+)"
match = re.match(pattern, github_url)
if not match:
raise ValueError("Invalid GitHub URL")
repo_owner, repo_name = match.groups()
return repo_owner, repo_name
def collect_release_notes(github_url):
releases = []
page = 1
# Extract the repository owner and name from the GitHub URL
repo_owner, repo_name = parse_github_repo(github_url)
while True:
print(f"Processing Page {page}")
# Make a GET request to the GitHub release page
url = f"https://github.com/{repo_owner}/{repo_name}/releases?page={page}"
response = requests.get(url)
# Check if the request was successful
if response.status_code != 200:
raise Exception(f"Failed to retrieve releases from the repository: {response.text}")
# Parse the HTML content
soup = BeautifulSoup(response.text, "html.parser")
# Find all of the release items on the page
release_headers = soup.select("#repo-content-turbo-frame span a")
release_bodies = soup.select(".Box-body > :nth-child(2)")
if not release_headers:
break
# Extract the release information
for header, body in zip(release_headers, release_bodies):
name = header.text
content = body.text
releases.append({"name": name, "body": content})
page += 1
print("done")
return releases
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("github_url", help="GitHub repository URL")
parser.add_argument("output_file", nargs="?", default="", help="Output file name")
args = parser.parse_args()
github_url = args.github_url
_, repo_name = parse_github_repo(github_url)
output_file = args.output_file or f"{repo_name}.md"
releases = collect_release_notes(github_url)
# Write the release notes to a file
with open(output_file, "w") as file:
for release in releases:
file.write("# " + release["name"] + "\n")
file.write(release["body"] + "\n")