Skip to content

Commit 25a30ec

Browse files
committed
Update readme and added support for cleaning discussions from specified category
1 parent 6ed2cab commit 25a30ec

File tree

2 files changed

+40
-12
lines changed

2 files changed

+40
-12
lines changed

utils/stackoverflow/README.md

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -86,16 +86,18 @@ A script that helps migrate Stack Overflow content to GitHub Discussions. It cre
8686
### Requirements
8787
* Python 3.x
8888
* Dependencies listed in requirements.txt
89-
* GitHub Personal Access Token with repo permissions
89+
* GitHub App with appropriate permissions (Contents, Discussions, Metadata)
9090

9191
### Setup
9292
1. Install the required dependencies:
9393
```
9494
pip install -r requirements.txt
9595
```
96-
2. Set your GitHub token as an environment variable:
96+
2. Set up GitHub App authentication by setting these environment variables:
9797
```
98-
export GITHUB_TOKEN=your_github_personal_access_token
98+
export GHD_INSTALLATION_ID=your_installation_id
99+
export GHD_APP_ID=your_github_app_id
100+
export GHD_PRIVATE_KEY=/path/to/your/private-key.pem
99101
```
100102

101103
### Usage
@@ -112,12 +114,24 @@ python populate_discussion.py --repo OWNER/REPO --category CATEGORY_NAME [option
112114
- `--image-folder`: Path to local folder containing images (default: discussion_images_temp)
113115
- `--clean`: Delete all discussions, comments, and labels before import
114116
- `--clean-only`: Delete all discussions, comments, and labels, then exit
117+
- `--clean-category`: Used with --category and --clean or --clean-only to delete discussions in the specified category only
115118

116119
#### Example
117120
```
118121
python populate_discussion.py --repo bcgov/developer-experience-team --category Q&A --limit 10
119122
```
120123

124+
#### Clean Operations
125+
To clean discussions in a specific category before importing:
126+
```
127+
python populate_discussion.py --repo bcgov/developer-experience-team --category Q&A --clean --clean-category
128+
```
129+
130+
To clean all discussions and exit:
131+
```
132+
python populate_discussion.py --repo bcgov/developer-experience-team --category Q&A --clean-only
133+
```
134+
121135
### Features
122136
- Converts Stack Overflow questions and answers to GitHub Discussions
123137
- Creates GitHub labels based on Stack Overflow tags

utils/stackoverflow/populate_discussion.py

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -256,16 +256,24 @@ def find_discussion_by_title(token, owner: str, name: str, title: str):
256256
return d['number'], d['id']
257257
return None, None
258258

259-
def clean_repo_discussions(token, owner: str, name: str):
260-
"""Delete all discussions, their comments, and remove all labels from the repo."""
261-
logger.warning("Cleaning all discussions, comments, and labels from the repository!")
259+
def clean_repo_discussions(token, owner: str, name: str, category_name: str = None):
260+
"""Delete all discussions, their comments, and remove all labels from the repo. Optionally filter by category."""
261+
262+
category_id = get_category_id(token, owner, name, category_name) if category_name else None
263+
264+
if not category_id:
265+
logger.warning("Cleaning all discussions, comments, and labels from the repository!")
266+
else:
267+
logger.warning(f"Cleaning discussions in category '{category_name}', comments, and labels from the repository!")
268+
269+
262270
has_next_page = True
263271
end_cursor = None
264272
while has_next_page:
265273
query = """
266-
query($owner: String!, $name: String!, $after: String) {
274+
query($owner: String!, $name: String!, $after: String, $categoryId: ID) {
267275
repository(owner: $owner, name: $name) {
268-
discussions(first: 50, after: $after) {
276+
discussions(first: 50, after: $after, categoryId: $categoryId) {
269277
nodes {
270278
id
271279
number
@@ -289,7 +297,7 @@ def clean_repo_discussions(token, owner: str, name: str):
289297
}
290298
}
291299
"""
292-
variables = {'owner': owner, 'name': name, 'after': end_cursor}
300+
variables = {'owner': owner, 'name': name, 'after': end_cursor, 'categoryId': category_id}
293301
data = github_graphql_request(token, query, variables)
294302
repo = data['repository']
295303
discussions = repo['discussions']['nodes']
@@ -396,8 +404,9 @@ def main():
396404
parser.add_argument('--limit', type=int, help='Limit number of questions to process')
397405
parser.add_argument('--image-folder', default='discussion_images_temp', help='Path to local folder containing images')
398406
group = parser.add_mutually_exclusive_group()
399-
group.add_argument('--clean', action='store_true', help='Delete all discussions, comments, and labels before import')
400-
group.add_argument('--clean-only', action='store_true', help='Delete all discussions, comments, and labels, then exit')
407+
group.add_argument('--clean', action='store_true', help='Delete all discussions, comments, and labels in the repo before import')
408+
group.add_argument('--clean-only', action='store_true', help='Delete all discussions, comments, and labels, in the repo then exit')
409+
parser.add_argument('--clean-category', action='store_true', help='Used with --category and --clean or --clean-only to delete all discussions, comments, and labels in the specified category')
401410
args = parser.parse_args()
402411

403412

@@ -411,6 +420,9 @@ def main():
411420

412421
if not installation_id.isdigit() or not app_id.isdigit():
413422
raise ValueError("INSTALLATION_ID and APP_ID must be numeric")
423+
424+
if args.clean_category and (not args.category or not (args.clean or args.clean_only)):
425+
raise ValueError("When using --clean-category, you must also specify --category and either --clean or --clean-only")
414426

415427
with open(private_key, "r") as key_file:
416428
private_key = key_file.read()
@@ -433,7 +445,7 @@ def main():
433445

434446
# Clean repository discussions, comments, and labels if --clean or --clean-only flag is set
435447
if args.clean or args.clean_only:
436-
clean_repo_discussions(token, owner, name)
448+
clean_repo_discussions(token, owner, name, args.category if args.clean_category else None)
437449
if args.clean_only:
438450
logger.info('Cleanup complete. Exiting due to --clean-only flag.')
439451
return
@@ -453,6 +465,8 @@ def main():
453465
# Get discussion category ID
454466
category_id = get_category_id(token, owner, name, args.category)
455467

468+
logger.info(f"category_id for '{args.category}': {category_id}")
469+
456470
# Process questions with limit if specified
457471
if args.limit:
458472
questions = questions[:args.limit]

0 commit comments

Comments
 (0)