新增 release-notes.py 用于爬取 Github 上的 release note 信息,使用了 ChatGPT 辅助。
parent
79f9fd9e60
commit
cce0894f7a
|
@ -0,0 +1,70 @@
|
|||
import argparse
|
||||
import re
|
||||
from bs4 import BeautifulSoup
|
||||
import requests
|
||||
|
||||
|
||||
def parse_github_repo(github_url):
|
||||
pattern = r"https://github.com/([^/]+)/([^/]+)"
|
||||
match = re.match(pattern, github_url)
|
||||
if not match:
|
||||
raise ValueError("Invalid GitHub URL")
|
||||
repo_owner, repo_name = match.groups()
|
||||
return repo_owner, repo_name
|
||||
|
||||
|
||||
def collect_release_notes(github_url):
|
||||
releases = []
|
||||
page = 1
|
||||
|
||||
# Extract the repository owner and name from the GitHub URL
|
||||
repo_owner, repo_name = parse_github_repo(github_url)
|
||||
|
||||
while True:
|
||||
print(f"Processing Page {page}")
|
||||
# Make a GET request to the GitHub release page
|
||||
url = f"https://github.com/{repo_owner}/{repo_name}/releases?page={page}"
|
||||
response = requests.get(url)
|
||||
|
||||
# Check if the request was successful
|
||||
if response.status_code != 200:
|
||||
raise Exception(f"Failed to retrieve releases from the repository: {response.text}")
|
||||
|
||||
# Parse the HTML content
|
||||
soup = BeautifulSoup(response.text, "html.parser")
|
||||
|
||||
# Find all of the release items on the page
|
||||
release_headers = soup.select("#repo-content-turbo-frame span a")
|
||||
release_bodies = soup.select(".Box-body > :nth-child(2)")
|
||||
if not release_headers:
|
||||
break
|
||||
|
||||
# Extract the release information
|
||||
for header, body in zip(release_headers, release_bodies):
|
||||
name = header.text
|
||||
content = body.text
|
||||
releases.append({"name": name, "body": content})
|
||||
|
||||
page += 1
|
||||
|
||||
print("done")
|
||||
return releases
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("github_url", help="GitHub repository URL")
|
||||
parser.add_argument("output_file", nargs="?", default="", help="Output file name")
|
||||
args = parser.parse_args()
|
||||
|
||||
github_url = args.github_url
|
||||
_, repo_name = parse_github_repo(github_url)
|
||||
output_file = args.output_file or f"{repo_name}.md"
|
||||
|
||||
releases = collect_release_notes(github_url)
|
||||
|
||||
# Write the release notes to a file
|
||||
with open(output_file, "w") as file:
|
||||
for release in releases:
|
||||
file.write("# " + release["name"] + "\n")
|
||||
file.write(release["body"] + "\n")
|
Loading…
Reference in New Issue