Skip to content

github_client

GitHub client for automating PR creation and git operations.

GitHubSyncClient

Client for syncing model references to GitHub legacy repositories via PRs.

Source code in src/horde_model_reference/sync/github_client.py
  31
  32
  33
  34
  35
  36
  37
  38
  39
  40
  41
  42
  43
  44
  45
  46
  47
  48
  49
  50
  51
  52
  53
  54
  55
  56
  57
  58
  59
  60
  61
  62
  63
  64
  65
  66
  67
  68
  69
  70
  71
  72
  73
  74
  75
  76
  77
  78
  79
  80
  81
  82
  83
  84
  85
  86
  87
  88
  89
  90
  91
  92
  93
  94
  95
  96
  97
  98
  99
 100
 101
 102
 103
 104
 105
 106
 107
 108
 109
 110
 111
 112
 113
 114
 115
 116
 117
 118
 119
 120
 121
 122
 123
 124
 125
 126
 127
 128
 129
 130
 131
 132
 133
 134
 135
 136
 137
 138
 139
 140
 141
 142
 143
 144
 145
 146
 147
 148
 149
 150
 151
 152
 153
 154
 155
 156
 157
 158
 159
 160
 161
 162
 163
 164
 165
 166
 167
 168
 169
 170
 171
 172
 173
 174
 175
 176
 177
 178
 179
 180
 181
 182
 183
 184
 185
 186
 187
 188
 189
 190
 191
 192
 193
 194
 195
 196
 197
 198
 199
 200
 201
 202
 203
 204
 205
 206
 207
 208
 209
 210
 211
 212
 213
 214
 215
 216
 217
 218
 219
 220
 221
 222
 223
 224
 225
 226
 227
 228
 229
 230
 231
 232
 233
 234
 235
 236
 237
 238
 239
 240
 241
 242
 243
 244
 245
 246
 247
 248
 249
 250
 251
 252
 253
 254
 255
 256
 257
 258
 259
 260
 261
 262
 263
 264
 265
 266
 267
 268
 269
 270
 271
 272
 273
 274
 275
 276
 277
 278
 279
 280
 281
 282
 283
 284
 285
 286
 287
 288
 289
 290
 291
 292
 293
 294
 295
 296
 297
 298
 299
 300
 301
 302
 303
 304
 305
 306
 307
 308
 309
 310
 311
 312
 313
 314
 315
 316
 317
 318
 319
 320
 321
 322
 323
 324
 325
 326
 327
 328
 329
 330
 331
 332
 333
 334
 335
 336
 337
 338
 339
 340
 341
 342
 343
 344
 345
 346
 347
 348
 349
 350
 351
 352
 353
 354
 355
 356
 357
 358
 359
 360
 361
 362
 363
 364
 365
 366
 367
 368
 369
 370
 371
 372
 373
 374
 375
 376
 377
 378
 379
 380
 381
 382
 383
 384
 385
 386
 387
 388
 389
 390
 391
 392
 393
 394
 395
 396
 397
 398
 399
 400
 401
 402
 403
 404
 405
 406
 407
 408
 409
 410
 411
 412
 413
 414
 415
 416
 417
 418
 419
 420
 421
 422
 423
 424
 425
 426
 427
 428
 429
 430
 431
 432
 433
 434
 435
 436
 437
 438
 439
 440
 441
 442
 443
 444
 445
 446
 447
 448
 449
 450
 451
 452
 453
 454
 455
 456
 457
 458
 459
 460
 461
 462
 463
 464
 465
 466
 467
 468
 469
 470
 471
 472
 473
 474
 475
 476
 477
 478
 479
 480
 481
 482
 483
 484
 485
 486
 487
 488
 489
 490
 491
 492
 493
 494
 495
 496
 497
 498
 499
 500
 501
 502
 503
 504
 505
 506
 507
 508
 509
 510
 511
 512
 513
 514
 515
 516
 517
 518
 519
 520
 521
 522
 523
 524
 525
 526
 527
 528
 529
 530
 531
 532
 533
 534
 535
 536
 537
 538
 539
 540
 541
 542
 543
 544
 545
 546
 547
 548
 549
 550
 551
 552
 553
 554
 555
 556
 557
 558
 559
 560
 561
 562
 563
 564
 565
 566
 567
 568
 569
 570
 571
 572
 573
 574
 575
 576
 577
 578
 579
 580
 581
 582
 583
 584
 585
 586
 587
 588
 589
 590
 591
 592
 593
 594
 595
 596
 597
 598
 599
 600
 601
 602
 603
 604
 605
 606
 607
 608
 609
 610
 611
 612
 613
 614
 615
 616
 617
 618
 619
 620
 621
 622
 623
 624
 625
 626
 627
 628
 629
 630
 631
 632
 633
 634
 635
 636
 637
 638
 639
 640
 641
 642
 643
 644
 645
 646
 647
 648
 649
 650
 651
 652
 653
 654
 655
 656
 657
 658
 659
 660
 661
 662
 663
 664
 665
 666
 667
 668
 669
 670
 671
 672
 673
 674
 675
 676
 677
 678
 679
 680
 681
 682
 683
 684
 685
 686
 687
 688
 689
 690
 691
 692
 693
 694
 695
 696
 697
 698
 699
 700
 701
 702
 703
 704
 705
 706
 707
 708
 709
 710
 711
 712
 713
 714
 715
 716
 717
 718
 719
 720
 721
 722
 723
 724
 725
 726
 727
 728
 729
 730
 731
 732
 733
 734
 735
 736
 737
 738
 739
 740
 741
 742
 743
 744
 745
 746
 747
 748
 749
 750
 751
 752
 753
 754
 755
 756
 757
 758
 759
 760
 761
 762
 763
 764
 765
 766
 767
 768
 769
 770
 771
 772
 773
 774
 775
 776
 777
 778
 779
 780
 781
 782
 783
 784
 785
 786
 787
 788
 789
 790
 791
 792
 793
 794
 795
 796
 797
 798
 799
 800
 801
 802
 803
 804
 805
 806
 807
 808
 809
 810
 811
 812
 813
 814
 815
 816
 817
 818
 819
 820
 821
 822
 823
 824
 825
 826
 827
 828
 829
 830
 831
 832
 833
 834
 835
 836
 837
 838
 839
 840
 841
 842
 843
 844
 845
 846
 847
 848
 849
 850
 851
 852
 853
 854
 855
 856
 857
 858
 859
 860
 861
 862
 863
 864
 865
 866
 867
 868
 869
 870
 871
 872
 873
 874
 875
 876
 877
 878
 879
 880
 881
 882
 883
 884
 885
 886
 887
 888
 889
 890
 891
 892
 893
 894
 895
 896
 897
 898
 899
 900
 901
 902
 903
 904
 905
 906
 907
 908
 909
 910
 911
 912
 913
 914
 915
 916
 917
 918
 919
 920
 921
 922
 923
 924
 925
 926
 927
 928
 929
 930
 931
 932
 933
 934
 935
 936
 937
 938
 939
 940
 941
 942
 943
 944
 945
 946
 947
 948
 949
 950
 951
 952
 953
 954
 955
 956
 957
 958
 959
 960
 961
 962
 963
 964
 965
 966
 967
 968
 969
 970
 971
 972
 973
 974
 975
 976
 977
 978
 979
 980
 981
 982
 983
 984
 985
 986
 987
 988
 989
 990
 991
 992
 993
 994
 995
 996
 997
 998
 999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
class GitHubSyncClient:
    """Client for syncing model references to GitHub legacy repositories via PRs."""

    def __init__(
        self,
    ) -> None:
        """Initialize the GitHub sync client."""
        from horde_model_reference.sync.config import github_sync_settings

        self.settings = github_sync_settings

        self._github_client: Github | None
        self._installation_auth: Auth.AppInstallationAuth | None = None

        # Try GitHub App authentication first, then fall back to token
        if github_app_settings.is_configured():
            logger.info("Using GitHub App installation authentication")
            self._github_client, self._installation_auth = self._create_app_authenticated_client()
        elif self.settings.github_token:
            logger.info("Using GitHub token authentication")
            auth = Auth.Token(self.settings.github_token)
            self._github_client = Github(auth=auth)
        else:
            raise RuntimeError("No GitHub authentication method configured")

        self._temp_dir: Path | None = None
        self._current_repo: Repo | None = None
        self._is_persistent_dir: bool = False
        self._original_branch: str | None = None

    def _create_app_authenticated_client(self) -> tuple[Github, Auth.AppInstallationAuth]:
        """Create a GitHub client using App installation authentication.

        Returns:
            Tuple of (Github client, installation auth object) for later token access.

        Raises:
            RuntimeError: If GitHub App settings are not properly configured.

        """
        if not github_app_settings.is_configured():
            raise RuntimeError("GitHub App settings are not fully configured")

        try:
            private_key = github_app_settings.get_private_key_content()

            # Type assertions - is_configured() ensures these are not None
            assert github_app_settings.github_app_id is not None
            assert github_app_settings.github_installation_id is not None

            # Create App authentication
            app_auth = Auth.AppAuth(
                app_id=github_app_settings.github_app_id,
                private_key=private_key,
            )

            # Get installation authentication with all permissions
            # token_permissions can be specified if you want to limit permissions
            auth = app_auth.get_installation_auth(
                installation_id=github_app_settings.github_installation_id,
            )

            logger.debug(
                f"Created GitHub App installation auth for app_id={github_app_settings.github_app_id}, "
                f"installation_id={github_app_settings.github_installation_id}"
            )

            return Github(auth=auth), auth

        except Exception as e:
            logger.error(f"Failed to create GitHub App authenticated client: {e}")
            raise RuntimeError(f"GitHub App authentication failed: {e}") from e

    def __enter__(self) -> GitHubSyncClient:
        """Context manager entry - creates temporary directory."""
        return self

    def __exit__(
        self,
        exc_type: type[BaseException] | None,
        exc_val: BaseException | None,
        exc_tb: TracebackType | None,
    ) -> None:
        """Context manager exit - cleans up temporary directory."""
        self.cleanup()

    def cleanup(self) -> None:
        """Clean up resources, preserving persistent directories.

        SIGNIFICANCE:
        - Persistent target directories must NOT be deleted (reused across runs)
        - Only temp directories (self._is_persistent_dir == False) should be removed
        """
        if self._current_repo:
            self._current_repo.close()
            self._current_repo = None

        if self._temp_dir and self._temp_dir.exists():
            if self._is_persistent_dir:
                logger.debug(f"Preserving persistent clone at {self._temp_dir}")
            else:
                logger.debug(f"Cleaning up temporary directory: {self._temp_dir}")
                try:
                    shutil.rmtree(self._temp_dir)
                except Exception as e:
                    logger.warning(f"Failed to clean up temporary directory {self._temp_dir}: {e}")

        self._temp_dir = None
        self._is_persistent_dir = False
        self._original_branch = None

    @contextmanager
    def _branch_operation(self) -> Generator[None]:
        """Context manager to ensure repository is returned to original branch.

        Captures the current branch before operations and restores it afterwards,
        even if an exception occurs. This prevents leaving repositories in a
        detached or temporary branch state.

        Usage:
            with self._branch_operation():
                # Create and work on temporary branch
                # Push changes and create PR
                # Branch will be restored automatically
        """
        if not self._current_repo:
            raise RuntimeError("No repository available for branch operation")

        try:
            self._original_branch = self._current_repo.active_branch.name
            logger.debug(f"Saved original branch: {self._original_branch}")
        except Exception as e:
            raise RuntimeError(f"Failed to determine current branch: {e}") from e

        try:
            yield
        finally:
            if self._current_repo and self._original_branch:
                try:
                    current_branch = self._current_repo.active_branch.name
                    if current_branch != self._original_branch:
                        logger.debug(f"Restoring original branch: {self._original_branch}")
                        self._current_repo.git.checkout(self._original_branch)
                except Exception as e:
                    logger.warning(f"Failed to restore original branch {self._original_branch}: {e}")

    def sync_category_to_github(
        self,
        *,
        category: MODEL_REFERENCE_CATEGORY,
        diff: ModelReferenceDiff,
        primary_data: dict[str, dict[str, Any]],
        text_generation_artifacts: TextGenerationSyncArtifacts | None = None,
    ) -> str | None:
        """Sync a category's model references to GitHub by creating a PR.

        Args:
            category (MODEL_REFERENCE_CATEGORY): The category to sync.
            diff: The detected differences for this category.
            primary_data: The complete PRIMARY data for this category (legacy format).
            text_generation_artifacts: Pre-computed serialization artifacts for
                text_generation.  When provided, ``_update_text_generation_files``
                writes these directly instead of re-running the serializer.

        Returns:
            The PR URL if created, None if no PR was needed or dry run.

        """
        if not diff.has_changes():
            logger.info(f"No changes detected for {category}, skipping PR creation")
            return None

        if diff.total_changes() < self.settings.min_changes_threshold:
            logger.info(
                f"Only {diff.total_changes()} changes for {category} "
                f"(threshold: {self.settings.min_changes_threshold}), skipping PR"
            )
            return None

        if self.settings.dry_run:
            logger.info(f"[DRY RUN] Would create PR for {category} with {diff.total_changes()} changes")
            logger.info(f"[DRY RUN] Summary:\n{diff.summary()}")
            return None

        try:
            github_repo_settings = horde_model_reference_settings.get_repo_by_category(category)
            repo_owner_and_name = github_repo_settings.repo_owner_and_name
            logger.info(f"Starting sync for {category} to {repo_owner_and_name}")

            self._clone_repository(github_repo_settings)

            with self._branch_operation():
                branch_name = self._create_sync_branch(category)
                self._update_category_file(
                    category,
                    primary_data,
                    text_generation_artifacts=text_generation_artifacts,
                )
                has_changes = self._commit_changes(category, diff)
                if not has_changes:
                    logger.warning(
                        f"Skipping PR creation for {category}: comparator detected "
                        f"{diff.total_changes()} changes but no actual file changes were produced."
                    )
                    return None
                self._push_branch(branch_name)
                pr_url = self._create_pull_request(
                    category, diff, repo_owner_and_name, branch_name, github_repo_settings
                )

            logger.success(f"Successfully created PR for {category}: {pr_url}")
            return pr_url

        except Exception as e:
            logger.error(f"Failed to sync {category} to GitHub: {e}")
            raise
        finally:
            self.cleanup()

    def sync_multiple_categories_to_github(
        self,
        *,
        repo_name: str,
        categories_data: dict[
            MODEL_REFERENCE_CATEGORY,
            tuple[ModelReferenceDiff, dict[str, dict[str, Any]], TextGenerationSyncArtifacts | None],
        ],
    ) -> str | None:
        """Sync multiple categories to GitHub in a single PR.

        Args:
            repo_name: Repository in 'owner/repo' format.
            categories_data: Dict mapping categories to (diff, primary_data, artifacts) tuples.

        Returns:
            The PR URL if created, None if no PR was needed or dry run.

        """
        total_changes = sum(diff.total_changes() for diff, _, _ in categories_data.values())

        if total_changes < self.settings.min_changes_threshold:
            logger.info(
                f"Only {total_changes} total changes across categories "
                f"(threshold: {self.settings.min_changes_threshold}), skipping PR"
            )
            return None

        if self.settings.dry_run:
            logger.info(
                f"[DRY RUN] Would create PR for {len(categories_data)} categories with {total_changes} changes"
            )
            for category, (diff, _, _) in categories_data.items():
                logger.info(f"[DRY RUN] {category}:\n{diff.summary()}")
            return None

        try:
            logger.info(f"Starting multi-category sync to {repo_name}")

            first_category = next(iter(categories_data.keys()))
            github_repo_settings = horde_model_reference_settings.get_repo_by_category(first_category)

            self._clone_repository(github_repo_settings)

            with self._branch_operation():
                branch_name = self._create_multi_category_sync_branch(list(categories_data.keys()))

                for category, (diff, primary_data, artifacts) in categories_data.items():
                    logger.info(f"Updating {category} with {diff.total_changes()} changes")
                    self._update_category_file(
                        category,
                        primary_data,
                        text_generation_artifacts=artifacts,
                    )

                has_changes = self._commit_multi_category_changes(categories_data)
                if not has_changes:
                    total = sum(diff.total_changes() for diff, _, _ in categories_data.values())
                    logger.warning(
                        f"Skipping PR creation for multi-category sync: comparator detected "
                        f"{total} changes but no actual file changes were produced."
                    )
                    return None
                self._push_branch(branch_name)
                pr_url = self._create_multi_category_pull_request(
                    categories_data, repo_name, branch_name, github_repo_settings
                )

            logger.success(f"Successfully created multi-category PR: {pr_url}")
            return pr_url

        except Exception as e:
            logger.error(f"Failed to sync multiple categories to GitHub: {e}")
            raise
        finally:
            self.cleanup()

    def _get_target_dir_for_repo(self, github_settings: GithubRepoSettings) -> Path | None:
        """Compute per-repository directory path within target_clone_dir.

        Args:
            github_settings: GitHub repository settings containing owner/name.

        Returns:
            Path to {target_clone_dir}/{owner}/{repo}/ or None if target_clone_dir not configured.

        Example:
            github_settings.owner = "Haidra-Org"
            github_settings.name = "AI-Horde-image-model-reference"
            target_clone_dir = "/path/to/clones"
            Returns: Path("/path/to/clones/Haidra-Org/AI-Horde-image-model-reference")

        """
        if not self.settings.target_clone_dir:
            return None

        base_dir = Path(self.settings.target_clone_dir)
        return base_dir / github_settings.repo_owner_and_name

    def _verify_existing_repo(self, repo_path: Path, expected_github_settings: GithubRepoSettings) -> None:
        """Verify existing directory matches expected repository identity.

        Extracts and compares repository owner/name from git remote URL and current branch.
        Ignores authentication information in URLs when comparing.

        Args:
            repo_path: Path to existing git repository.
            expected_github_settings: Expected GitHub repository settings.

        Raises:
            RuntimeError: If directory is not a git repository.
            ValueError: If owner/repo or branch doesn't match expected values.

        """
        git_dir = repo_path / ".git"
        if not git_dir.exists():
            raise RuntimeError(f"Target directory exists but is not a git repository: {repo_path}")

        try:
            repo = Repo(repo_path)
        except Exception as e:
            raise RuntimeError(f"Failed to open git repository at {repo_path}: {e}") from e

        try:
            remote_url = repo.remote("origin").url
        except Exception as e:
            raise RuntimeError(f"Failed to get remote URL from repository at {repo_path}: {e}") from e

        # Strip any authentication information before parsing
        clean_url = self._strip_auth_from_url(remote_url)
        actual_repo_name = self._parse_repo_name_from_url(clean_url)
        expected_repo_name = expected_github_settings.repo_owner_and_name

        if actual_repo_name != expected_repo_name:
            raise ValueError(
                f"Repository mismatch - Expected: {expected_repo_name}, Found: {actual_repo_name}. "
                f"Aborting to prevent data corruption."
            )

        logger.info(f"✓ Repository: {actual_repo_name} (matches expected)")

    def _strip_auth_from_url(self, url: str) -> str:
        """Strip authentication information from a git remote URL.

        Args:
            url: Git remote URL that may contain authentication credentials.

        Returns:
            URL with authentication information removed.

        Example:
            "https://token@github.com/owner/repo.git" -> "https://github.com/owner/repo.git"
            "https://user:pass@github.com/owner/repo.git" -> "https://github.com/owner/repo.git"

        """
        parsed = urlparse(url)
        # Handle HTTPS URLs: strip credentials if host is github.com
        if parsed.scheme in ("http", "https") and parsed.hostname and parsed.hostname.lower() == "github.com":
            # Rebuild URL without username/password
            path = parsed.path or ""
            query = f"?{parsed.query}" if parsed.query else ""
            fragment = f"#{parsed.fragment}" if parsed.fragment else ""
            return f"{parsed.scheme}://github.com{path}{query}{fragment}"

        # Handle SSH URL format: git@github.com:owner/repo.git
        if url.startswith("git@github.com:"):
            # Already no credentials other than 'git'
            return url
        return url

    def _parse_repo_name_from_url(self, url: str) -> str:
        """Parse owner/repo from a git remote URL.

        Args:
            url: Git remote URL (https:// or git@ format), should be cleaned of auth first.

        Returns:
            Repository name in 'owner/repo' format.

        Example:
            "https://github.com/Haidra-Org/AI-Horde-image-model-reference.git"
            -> "Haidra-Org/AI-Horde-image-model-reference"

        """
        # HTTPS clone URL: https://github.com/owner/repo.git
        if url.startswith("https://"):
            parsed = urlparse(url)
            if parsed.hostname != "github.com":
                raise ValueError(f"URL hostname must be github.com, got: {parsed.hostname}")
            repo_path = parsed.path.replace(".git", "").strip("/")
            if "/" not in repo_path:
                raise ValueError(f"URL path '{repo_path}' does not match owner/repo format")
            return repo_path

        # SSH clone URL: git@github.com:owner/repo.git
        if url.startswith("git@github.com:"):
            repo_path = url[len("git@github.com:") :].replace(".git", "").strip("/")
            if "/" not in repo_path:
                raise ValueError(f"URL path '{repo_path}' does not match owner/repo format")
            return repo_path

        # Also handle "ssh://git@github.com/owner/repo.git"
        if url.startswith("ssh://"):
            parsed = urlparse(url)
            if parsed.hostname != "github.com":
                raise ValueError(f"URL hostname must be github.com, got: {parsed.hostname}")
            repo_path = parsed.path.replace(".git", "").strip("/")
            if repo_path.startswith("~"):
                repo_path = repo_path.lstrip("~/")
            if "/" not in repo_path:
                raise ValueError(f"URL path '{repo_path}' does not match owner/repo format")
            return repo_path

        raise ValueError(f"Unable to parse repository name from URL: {url}")

    def _check_for_local_changes(self, repo: Repo) -> bool:
        """Check if repository has uncommitted changes.

        Args:
            repo: GitPython Repo object.

        Returns:
            True if uncommitted changes or untracked files exist.

        """
        return repo.is_dirty(untracked_files=True)

    def _reset_existing_repo(self, repo: Repo, github_settings: GithubRepoSettings) -> None:
        """Reset existing repository to match remote branch state.

        Sequence:
        1. Fetch latest from origin
        2. Check for local changes
        3. If changes exist, prompt user to continue or abort
        4. Hard reset to origin/{branch}
        5. Clean untracked files

        Args:
            repo: GitPython Repo object for repository to reset.
            github_settings: GitHub repository settings containing branch name.

        Raises:
            RuntimeError: If user chooses to abort or on git operation failure.

        """
        logger.info("Fetching latest changes from origin...")
        try:
            repo.remotes.origin.fetch()
        except Exception as e:
            raise RuntimeError(f"Failed to fetch from origin: {e}") from e

        if self._check_for_local_changes(repo):
            logger.warning(f"Local changes detected in {repo.working_dir}")

            changed_files = []
            if repo.is_dirty():
                changed_files.extend([item.a_path for item in repo.index.diff(None) if item.a_path])
                changed_files.extend([item.a_path for item in repo.index.diff("HEAD") if item.a_path])
            if repo.untracked_files:
                changed_files.extend(repo.untracked_files)

            if changed_files:
                logger.warning("Changed files:")
                for file_path in sorted(set(changed_files))[:10]:
                    logger.warning(f"  {file_path}")
                if len(changed_files) > 10:
                    logger.warning(f"  ... and {len(changed_files) - 10} more")

            try:
                response = input("Continue and discard all local changes? (y/n): ").strip().lower()
                if response != "y":
                    raise RuntimeError("Aborting to preserve local changes")
            except EOFError as e:
                raise RuntimeError("Aborting: no user input available") from e

        target_ref = f"origin/{github_settings.branch}"
        logger.info(f"Resetting to {target_ref}...")

        try:
            repo.head.reset(target_ref, index=True, working_tree=True)
            repo.git.clean("-fdx")
            logger.info(f"✓ Reset to {target_ref} and cleaned untracked files")
        except Exception as e:
            raise RuntimeError(f"Failed to reset repository: {e}") from e

    def _clone_repository(self, github_settings: GithubRepoSettings) -> None:
        """Clone or reuse GitHub repository.

        SIGNIFICANCE:
        - Uses HordeModelReferenceSettings as single source of truth for GitHub URLs
        - github_settings contains owner, name, branch, and constructs all URLs
        - Multiple categories may map to same repo (persistence is per-repo, not per-category)

        Flow:
        1. Determine target directory via _get_target_dir_for_repo(github_settings)
        2. If no persistent dir configured: use temp directory logic
        3. If persistent dir exists: verify identity and reset
        4. If persistent dir doesn't exist: clone to persistent dir

        Args:
            github_settings: GitHub repository settings from HordeModelReferenceSettings.

        """
        target_dir = self._get_target_dir_for_repo(github_settings)
        repo_name = github_settings.repo_owner_and_name

        if target_dir is None:
            if self.settings.sync_temp_dir:
                temp_base = Path(self.settings.sync_temp_dir)
                temp_base.mkdir(parents=True, exist_ok=True)
                self._temp_dir = Path(tempfile.mkdtemp(dir=temp_base))
            else:
                self._temp_dir = Path(tempfile.mkdtemp())

            logger.debug(f"Created temporary directory: {self._temp_dir}")
            self._is_persistent_dir = False

            repo_url = github_settings.git_clone_url
            logger.info(f"Cloning {repo_url} to {self._temp_dir}")

            try:
                self._current_repo = Repo.clone_from(
                    url=repo_url,
                    to_path=self._temp_dir,
                    branch=github_settings.branch,
                    depth=1,
                )
                logger.debug(f"Successfully cloned {repo_name}")
            except Exception as e:
                logger.error(f"Failed to clone repository {repo_name}: {e}")
                raise

        elif target_dir.exists():
            logger.info(f"Found existing clone at {target_dir}, verifying identity...")

            self._verify_existing_repo(target_dir, github_settings)

            try:
                self._current_repo = Repo(target_dir)
            except Exception as e:
                raise RuntimeError(f"Failed to open existing repository at {target_dir}: {e}") from e

            logger.info(f"Verified repository identity: {repo_name} (branch: {github_settings.branch})")

            self._reset_existing_repo(self._current_repo, github_settings)

            self._temp_dir = target_dir
            self._is_persistent_dir = True

        else:
            target_dir.parent.mkdir(parents=True, exist_ok=True)

            repo_url = github_settings.git_clone_url
            logger.info(f"Cloning {repo_url} to {target_dir}...")

            try:
                self._current_repo = Repo.clone_from(
                    url=repo_url,
                    to_path=target_dir,
                    branch=github_settings.branch,
                )
                logger.info(f"Successfully cloned {repo_name} (branch: {github_settings.branch})")
            except Exception as e:
                logger.error(f"Failed to clone repository {repo_name}: {e}")
                raise

            self._temp_dir = target_dir
            self._is_persistent_dir = True

    def _create_sync_branch(self, category: MODEL_REFERENCE_CATEGORY) -> str:
        """Create a new branch for the sync operation.

        Args:
            category (MODEL_REFERENCE_CATEGORY): The category being synced.

        Returns:
            The name of the created branch.

        """
        if not self._current_repo:
            raise RuntimeError("No repository cloned")

        timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
        branch_name = f"sync/{category}/{timestamp}"

        logger.debug(f"Creating branch: {branch_name}")
        self._current_repo.git.checkout("-b", branch_name)

        return branch_name

    def _generate_backend_prefixes_for_github(
        self,
        grouped_data: dict[str, dict[str, Any]],
    ) -> dict[str, dict[str, Any]]:
        """Generate backend prefix duplicates for GitHub sync (legacy format compatibility).

        This replicates the logic from scripts/legacy_text/convert.py lines 85-87.
        For each base model, creates 3 entries:
        1. Base entry (e.g., "llama-2-7b")
        2. Aphrodite prefixed (e.g., "aphrodite/llama-2-7b")
        3. KoboldCPP prefixed (e.g., "koboldcpp/llama-2-7b")

        This is ONLY used for GitHub sync to maintain backward compatibility with
        the legacy GitHub JSON format. Internally, we store grouped data (CSV).

        Args:
            grouped_data: The grouped model data (one entry per base model).

        Returns:
            dict[str, dict[str, Any]]: Model data with backend prefix duplicates.

        """
        from horde_model_reference.meta_consts import TEXT_BACKENDS
        from horde_model_reference.text_backend_names import TEXT_LEGACY_BACKEND_PREFIXES

        result: dict[str, dict[str, Any]] = {}

        for name, record in grouped_data.items():
            model_name = record.get("model_name", name)

            # Generate 3 entries: base, aphrodite/, koboldcpp/
            key_formats = [
                ("{name}", name),  # Base entry
                (f"{TEXT_LEGACY_BACKEND_PREFIXES[TEXT_BACKENDS.aphrodite]}{{name}}", name),  # aphrodite/
                (
                    f"{TEXT_LEGACY_BACKEND_PREFIXES[TEXT_BACKENDS.koboldcpp]}{{model_name}}",
                    model_name,
                ),  # koboldcpp/
            ]

            for key_format, value in key_formats:
                key = key_format.format(name=value, model_name=model_name) if "{" in key_format else key_format
                # Create a copy with the updated name field
                record_copy = record.copy()
                record_copy["name"] = key
                result[key] = record_copy

        logger.debug(
            f"Generated {len(result)} total records from {len(grouped_data)} base models "
            "(including backend prefix duplicates for GitHub)"
        )
        return result

    def _update_category_file(
        self,
        category: MODEL_REFERENCE_CATEGORY,
        primary_data: dict[str, dict[str, Any]],
        *,
        text_generation_artifacts: TextGenerationSyncArtifacts | None = None,
    ) -> None:
        """Update the category file(s) with PRIMARY data.

        For text_generation, produces both models.csv and db.json via the
        CSV-mediated serialization pipeline (matching upstream convert.py output).
        For other categories, writes a single JSON file.

        Args:
            category (MODEL_REFERENCE_CATEGORY): The category to update.
            primary_data: The complete PRIMARY data in legacy format (grouped, no backend prefixes).
            text_generation_artifacts: Pre-computed serialization artifacts for
                text_generation. When provided, files are written directly without
                re-running the serializer.

        """
        if not self._current_repo or not self._temp_dir:
            raise RuntimeError("No repository cloned")

        if category == MODEL_REFERENCE_CATEGORY.text_generation:
            self._update_text_generation_files(primary_data, artifacts=text_generation_artifacts)
            return

        filename = str(horde_model_reference_paths.get_model_reference_filename(category))
        file_path = self._temp_dir / filename

        logger.debug(f"Updating {file_path} with PRIMARY data")

        serialized_data = json.dumps(primary_data, indent=4, sort_keys=False)
        serialized_data = serialized_data + "\n"

        file_path.write_text(serialized_data, encoding="utf-8")
        logger.debug(f"Wrote {len(primary_data)} models to {file_path}")

    def _update_text_generation_files(
        self,
        primary_data: dict[str, dict[str, Any]],
        *,
        artifacts: TextGenerationSyncArtifacts | None = None,
    ) -> None:
        """Update text_generation by producing both models.csv and db.json.

        Uses the CSV-mediated serialization pipeline to guarantee db.json
        output is byte-compatible with the upstream convert.py.

        Args:
            primary_data: The complete PRIMARY data (may include backend-prefixed entries).
            artifacts: Pre-computed serialization artifacts. When provided, these
                are written directly instead of re-running the serializer.

        """
        assert self._temp_dir is not None

        if artifacts is None:
            from horde_model_reference.sync.text_generation_serializer import TextGenerationSerializer
            from horde_model_reference.text_backend_names import has_legacy_text_backend_prefix

            base_records = {
                name: record for name, record in primary_data.items() if not has_legacy_text_backend_prefix(name)
            }

            logger.debug(f"Serializing {len(base_records)} base text generation records via CSV pipeline")

            serializer = TextGenerationSerializer()
            existing_csv_path = self._temp_dir / "models.csv"
            artifacts = serializer.serialize(
                primary_base_records=base_records,
                existing_csv_path=existing_csv_path,
            )
        else:
            logger.debug("Using pre-computed text generation serialization artifacts")

        csv_path = self._temp_dir / "models.csv"
        csv_path.write_text(artifacts.csv_content, encoding="utf-8")
        db_json_path = self._temp_dir / "db.json"
        db_json_path.write_text(artifacts.json_content, encoding="utf-8")

        logger.debug(f"Wrote models.csv and db.json for text_generation to {self._temp_dir}")

    def _commit_changes(
        self,
        category: MODEL_REFERENCE_CATEGORY,
        diff: ModelReferenceDiff,
    ) -> bool:
        """Commit the changes to the repository.

        Uses --no-gpg-sign to bypass GPG signing requirements for automated commits.
        This prevents issues when running in environments without GPG configured.

        Args:
            category (MODEL_REFERENCE_CATEGORY): The category being synced.
            diff: The diff summary for generating commit message.

        Returns:
            True if changes were committed, False if there were no changes to commit.

        """
        if not self._current_repo:
            raise RuntimeError("No repository cloned")

        self._current_repo.git.add(".")

        if not self._current_repo.is_dirty():
            logger.warning(
                f"No actual file changes for {category} despite comparator detecting "
                f"{diff.total_changes()} differences. This indicates the comparison "
                "produced false positives (e.g. due to JSON parser inconsistencies)."
            )
            return False

        commit_message = self._generate_commit_message(category, diff)
        logger.debug(f"Committing with message:\n{commit_message}")

        self._current_repo.git.commit("-m", commit_message, "--no-gpg-sign")
        logger.debug("Changes committed successfully")
        return True

    def _push_branch(self, branch_name: str) -> None:
        """Push the branch to the remote repository.

        Args:
            branch_name: The name of the branch to push.

        """
        if not self._current_repo:
            raise RuntimeError("No repository cloned?")

        try:
            repo_url_with_auth = self._get_authenticated_repo_url()

            logger.info(f"Pushing branch {branch_name}")
            self._current_repo.git.push(repo_url_with_auth, branch_name)
            logger.debug("Branch pushed successfully")
        except Exception as e:
            logger.error(f"Failed to push branch {branch_name}: {e}")
            raise

    def _get_authenticated_repo_url(self) -> str:
        """Get the repository URL with authentication token.

        Returns:
            The authenticated repository URL.

        """
        if not self._current_repo:
            raise RuntimeError("No repository cloned")

        remote_url = self._current_repo.remote("origin").url

        # Strip any existing authentication before adding new token
        clean_url = self._strip_auth_from_url(remote_url)

        hostname = urlparse(clean_url).hostname
        if hostname and hostname.lower() == "github.com":
            repo_path = self._parse_repo_name_from_url(clean_url)

            # Try GitHub App authentication first
            if self._installation_auth is not None:
                try:
                    # Use the installation auth object we stored during initialization
                    token = self._installation_auth.token
                    return f"https://x-access-token:{token}@github.com/{repo_path}.git"
                except Exception as e:
                    logger.warning(f"Failed to get GitHub App token for push: {e}")
                    # Fall through to token auth

            # Fall back to personal access token
            if self.settings.github_token:
                return f"https://{self.settings.github_token}@github.com/{repo_path}.git"

        return remote_url

    def _find_existing_sync_prs(self, repo_name: str, category: MODEL_REFERENCE_CATEGORY | None = None) -> list[Any]:
        """Find existing open PRs created by the sync service.

        Args:
            repo_name: Repository in 'owner/repo' format.
            category: Optional category to filter PRs. If None, finds all sync PRs.

        Returns:
            List of open pull request objects created by the sync service.

        """
        if not self._github_client:
            raise RuntimeError("GitHub client not initialized")

        try:
            repo = self._github_client.get_repo(repo_name)
            open_prs = repo.get_pulls(state="open", sort="created", direction="desc")

            sync_prs = []
            for pr in open_prs:
                # Check if PR is from a sync branch and matches category filter (if specified)
                if pr.head.ref.startswith("sync/") and (
                    category is None or f"sync/{category}/" in pr.head.ref or "sync/multi-category/" in pr.head.ref
                ):
                    sync_prs.append(pr)

            return sync_prs

        except GithubException as e:
            logger.warning(f"Failed to find existing sync PRs: {e}")
            return []

    def _close_existing_sync_prs(self, repo_name: str, category: MODEL_REFERENCE_CATEGORY | None = None) -> None:
        """Close existing open PRs created by the sync service.

        Args:
            repo_name: Repository in 'owner/repo' format.
            category: Optional category to filter PRs. If None, closes all sync PRs.

        """
        existing_prs = self._find_existing_sync_prs(repo_name, category)

        if not existing_prs:
            logger.debug(f"No existing sync PRs found for {repo_name}")
            return

        logger.info(f"Found {len(existing_prs)} existing sync PR(s) to close")

        for pr in existing_prs:
            try:
                # Add a comment explaining why it's being closed
                comment = (
                    "This PR is being automatically closed because a new sync operation has been initiated.\n\n"
                    "A new PR with updated changes will be created shortly."
                )
                pr.create_issue_comment(comment)

                # Close the PR
                pr.edit(state="closed")
                logger.info(f"Closed PR #{pr.number}: {pr.title}")

            except GithubException as e:
                logger.warning(f"Failed to close PR #{pr.number}: {e}")

    def _create_pull_request(
        self,
        category: MODEL_REFERENCE_CATEGORY,
        diff: ModelReferenceDiff,
        repo_name: str,
        branch_name: str,
        github_settings: GithubRepoSettings,
    ) -> str:
        """Create a pull request for the sync.

        Args:
            category (MODEL_REFERENCE_CATEGORY): The category being synced.
            diff: The diff summary for generating PR description.
            repo_name: Repository in 'owner/repo' format.
            branch_name: The name of the branch to create PR from.
            github_settings: GitHub repository settings containing branch name.

        Returns:
            The URL of the created PR.

        """
        if not self._github_client:
            raise RuntimeError("GitHub client not initialized")

        # Close any existing sync PRs for this category
        self._close_existing_sync_prs(repo_name, category)

        try:
            repo = self._github_client.get_repo(repo_name)
            title = self._generate_pr_title(category)
            body = self._generate_pr_body(category, diff)

            logger.info(f"Creating PR: {title}")

            pr = repo.create_pull(
                title=title,
                body=body,
                head=branch_name,
                base=github_settings.branch,
            )

            if self.settings.pr_labels:
                pr.add_to_labels(*self.settings.pr_labels)

            if self.settings.pr_reviewers:
                try:
                    pr.create_review_request(reviewers=self.settings.pr_reviewers)
                except GithubException as e:
                    logger.warning(f"Failed to assign reviewers: {e}")

            if self.settings.pr_auto_assign_team:
                try:
                    team_slug = self.settings.pr_auto_assign_team.split("/")[-1]
                    pr.create_review_request(team_reviewers=[team_slug])
                except GithubException as e:
                    logger.warning(f"Failed to assign team: {e}")

            return pr.html_url

        except GithubException as e:
            logger.error(f"Failed to create PR: {e}")
            raise

    def _generate_commit_message(
        self,
        category: MODEL_REFERENCE_CATEGORY,
        diff: ModelReferenceDiff,
    ) -> str:
        """Generate a commit message from the diff.

        Args:
            category (MODEL_REFERENCE_CATEGORY): The category being synced.
            diff: The diff summary.

        Returns:
            The commit message.

        """
        lines = [f"Sync {category} from PRIMARY instance"]
        lines.append("")

        if diff.added_models:
            lines.append(f"Added {len(diff.added_models)} models:")
            for model_name in sorted(diff.added_models.keys())[:10]:
                lines.append(f"  + {model_name}")
            if len(diff.added_models) > 10:
                lines.append(f"  ... and {len(diff.added_models) - 10} more")

        if diff.removed_models:
            lines.append(f"\nRemoved {len(diff.removed_models)} models:")
            for model_name in sorted(diff.removed_models.keys())[:10]:
                lines.append(f"  - {model_name}")
            if len(diff.removed_models) > 10:
                lines.append(f"  ... and {len(diff.removed_models) - 10} more")

        if diff.modified_models:
            lines.append(f"\nModified {len(diff.modified_models)} models:")
            for model_name in sorted(diff.modified_models.keys())[:10]:
                lines.append(f"  ~ {model_name}")
            if len(diff.modified_models) > 10:
                lines.append(f"  ... and {len(diff.modified_models) - 10} more")

        lines.append("")
        lines.append("Generated by horde-model-reference GitHub sync service")

        return "\n".join(lines)

    def _generate_pr_title(self, category: MODEL_REFERENCE_CATEGORY) -> str:
        """Generate a PR title.

        Args:
            category (MODEL_REFERENCE_CATEGORY): The category being synced.

        Returns:
            The PR title.

        """
        date_str = datetime.now().strftime("%Y-%m-%d")
        return f"Auto Sync {category} from horde_model_reference service - {date_str}"

    def _generate_pr_body(
        self,
        category: MODEL_REFERENCE_CATEGORY,
        diff: ModelReferenceDiff,
    ) -> str:
        """Generate a PR description from the diff.

        Args:
            category (MODEL_REFERENCE_CATEGORY): The category being synced.
            diff: The diff summary.

        Returns:
            The PR body in Markdown format.

        """
        lines = [
            "## Automated Sync from horde_model_reference service",
            "",
            f"This PR synchronizes the `{category}` model references from the horde_model_reference service.",
            "",
            "### Changes Summary",
            "",
        ]

        if self.settings.primary_api_url:
            lines.append(f"**Source:** {self.settings.primary_api_url}")
            lines.append("")

        lines.append(f"- **Added:** {len(diff.added_models)} models")
        lines.append(f"- **Removed:** {len(diff.removed_models)} models")
        lines.append(f"- **Modified:** {len(diff.modified_models)} models")
        lines.append(f"- **Total Changes:** {diff.total_changes()}")
        lines.append("")

        if diff.added_models:
            lines.append("#### Added Models")
            lines.append("")
            for model_name in sorted(diff.added_models.keys())[:20]:
                lines.append(f"- `{model_name}`")
            if len(diff.added_models) > 20:
                lines.append(f"- ... and {len(diff.added_models) - 20} more")
            lines.append("")

        if diff.removed_models:
            lines.append("#### Removed Models")
            lines.append("")
            for model_name in sorted(diff.removed_models.keys())[:20]:
                lines.append(f"- `{model_name}`")
            if len(diff.removed_models) > 20:
                lines.append(f"- ... and {len(diff.removed_models) - 20} more")
            lines.append("")

        if diff.modified_models:
            lines.append("#### Modified Models")
            lines.append("")
            for model_name in sorted(diff.modified_models.keys())[:20]:
                lines.append(f"- `{model_name}`")
            if len(diff.modified_models) > 20:
                lines.append(f"- ... and {len(diff.modified_models) - 20} more")
            lines.append("")

        lines.append("---")
        lines.append("")
        lines.append("*This PR was automatically generated by the horde-model-reference GitHub sync service.*")
        lines.append("")
        lines.append(
            "Please review the changes carefully before merging. "
            "If you notice any issues, contact the horde_model_reference service administrator."
        )

        return "\n".join(lines)

    def _create_multi_category_sync_branch(self, categories: list[MODEL_REFERENCE_CATEGORY]) -> str:
        """Create a new branch for multi-category sync operation.

        Args:
            categories: The list of categories being synced.

        Returns:
            The name of the created branch.

        """
        if not self._current_repo:
            raise RuntimeError("No repository cloned")

        timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
        branch_name = f"sync/multi-category/{timestamp}"

        logger.debug(f"Creating multi-category branch: {branch_name}")
        self._current_repo.git.checkout("-b", branch_name)

        return branch_name

    def _commit_multi_category_changes(
        self,
        categories_data: dict[
            MODEL_REFERENCE_CATEGORY,
            tuple[ModelReferenceDiff, dict[str, dict[str, Any]], TextGenerationSyncArtifacts | None],
        ],
    ) -> bool:
        """Commit changes for multiple categories.

        Uses --no-gpg-sign to bypass GPG signing requirements for automated commits.
        This prevents issues when running in environments without GPG configured.

        Args:
            categories_data: Dict mapping categories to (diff, primary_data, artifacts) tuples.

        Returns:
            True if changes were committed, False if there were no changes to commit.

        """
        if not self._current_repo:
            raise RuntimeError("No repository cloned")

        self._current_repo.git.add(".")

        if not self._current_repo.is_dirty():
            total = sum(diff.total_changes() for diff, _, _ in categories_data.values())
            logger.warning(
                f"No actual file changes despite comparator detecting {total} total "
                "differences across categories. This indicates the comparison "
                "produced false positives (e.g. due to JSON parser inconsistencies)."
            )
            return False

        commit_message = self._generate_multi_category_commit_message(categories_data)
        logger.debug(f"Committing with message:\n{commit_message}")

        self._current_repo.git.commit("-m", commit_message, "--no-gpg-sign")
        logger.debug("Changes committed successfully")
        return True

    def _generate_multi_category_commit_message(
        self,
        categories_data: dict[
            MODEL_REFERENCE_CATEGORY,
            tuple[ModelReferenceDiff, dict[str, dict[str, Any]], TextGenerationSyncArtifacts | None],
        ],
    ) -> str:
        """Generate a commit message for multi-category sync.

        Args:
            categories_data: Dict mapping categories to (diff, primary_data, artifacts) tuples.

        Returns:
            The commit message.

        """
        category_names = ", ".join(str(cat) for cat in sorted(categories_data.keys()))
        total_changes = sum(diff.total_changes() for diff, _, _ in categories_data.values())

        lines = ["Sync multiple categories from PRIMARY instance"]
        lines.append("")
        lines.append(f"Categories: {category_names}")
        lines.append(f"Total changes: {total_changes}")
        lines.append("")

        for category in sorted(categories_data.keys()):
            diff, _, _ = categories_data[category]
            lines.append(f"## {category}")

            if diff.added_models:
                lines.append(f"Added {len(diff.added_models)} models:")
                for model_name in sorted(diff.added_models.keys())[:5]:
                    lines.append(f"  + {model_name}")
                if len(diff.added_models) > 5:
                    lines.append(f"  ... and {len(diff.added_models) - 5} more")

            if diff.removed_models:
                lines.append(f"Removed {len(diff.removed_models)} models:")
                for model_name in sorted(diff.removed_models.keys())[:5]:
                    lines.append(f"  - {model_name}")
                if len(diff.removed_models) > 5:
                    lines.append(f"  ... and {len(diff.removed_models) - 5} more")

            if diff.modified_models:
                lines.append(f"Modified {len(diff.modified_models)} models:")
                for model_name in sorted(diff.modified_models.keys())[:5]:
                    lines.append(f"  ~ {model_name}")
                if len(diff.modified_models) > 5:
                    lines.append(f"  ... and {len(diff.modified_models) - 5} more")

            lines.append("")

        lines.append("Generated by horde-model-reference GitHub sync service")

        return "\n".join(lines)

    def _create_multi_category_pull_request(
        self,
        categories_data: dict[
            MODEL_REFERENCE_CATEGORY,
            tuple[ModelReferenceDiff, dict[str, dict[str, Any]], TextGenerationSyncArtifacts | None],
        ],
        repo_name: str,
        branch_name: str,
        github_settings: GithubRepoSettings,
    ) -> str:
        """Create a pull request for multi-category sync.

        Args:
            categories_data: Dict mapping categories to (diff, primary_data) tuples.
            repo_name: Repository in 'owner/repo' format.
            branch_name: The name of the branch to create PR from.
            github_settings: GitHub repository settings containing branch name.

        Returns:
            The URL of the created PR.

        """
        if not self._github_client:
            raise RuntimeError("GitHub client not initialized")

        # Close any existing sync PRs for this repository
        # For multi-category PRs, we close all sync PRs regardless of category
        self._close_existing_sync_prs(repo_name, category=None)

        try:
            repo = self._github_client.get_repo(repo_name)
            title = self._generate_multi_category_pr_title(list(categories_data.keys()))
            body = self._generate_multi_category_pr_body(categories_data)

            logger.info(f"Creating multi-category PR: {title}")

            pr = repo.create_pull(
                title=title,
                body=body,
                head=branch_name,
                base=github_settings.branch,
            )

            if self.settings.pr_labels:
                pr.add_to_labels(*self.settings.pr_labels)

            if self.settings.pr_reviewers:
                try:
                    pr.create_review_request(reviewers=self.settings.pr_reviewers)
                except GithubException as e:
                    logger.warning(f"Failed to assign reviewers: {e}")

            if self.settings.pr_auto_assign_team:
                try:
                    team_slug = self.settings.pr_auto_assign_team.split("/")[-1]
                    pr.create_review_request(team_reviewers=[team_slug])
                except GithubException as e:
                    logger.warning(f"Failed to assign team: {e}")

            return pr.html_url

        except GithubException as e:
            logger.error(f"Failed to create PR: {e}")
            raise

    def _generate_multi_category_pr_title(self, categories: list[MODEL_REFERENCE_CATEGORY]) -> str:
        """Generate a PR title for multi-category sync.

        Args:
            categories: The list of categories being synced.

        Returns:
            The PR title.

        """
        date_str = datetime.now().strftime("%Y-%m-%d")
        return f"Sync multiple categories from PRIMARY instance - {date_str}"

    def _generate_multi_category_pr_body(
        self,
        categories_data: dict[
            MODEL_REFERENCE_CATEGORY,
            tuple[ModelReferenceDiff, dict[str, dict[str, Any]], TextGenerationSyncArtifacts | None],
        ],
    ) -> str:
        """Generate a PR description for multi-category sync.

        Args:
            categories_data: Dict mapping categories to (diff, primary_data, artifacts) tuples.

        Returns:
            The PR body in Markdown format.

        """
        total_added = sum(len(diff.added_models) for diff, _, _ in categories_data.values())
        total_removed = sum(len(diff.removed_models) for diff, _, _ in categories_data.values())
        total_modified = sum(len(diff.modified_models) for diff, _, _ in categories_data.values())
        total_changes = total_added + total_removed + total_modified

        lines = [
            "## Automated Multi-Category Sync from PRIMARY Instance",
            "",
            f"This PR synchronizes **{len(categories_data)} categories** from the PRIMARY instance.",
            "",
            f"**Categories:** {', '.join(f'`{cat}`' for cat in sorted(categories_data.keys()))}",
            "",
            "### Overall Changes Summary",
            "",
        ]

        if self.settings.primary_api_url:
            lines.append(f"**Source:** {self.settings.primary_api_url}")
            lines.append("")

        lines.append(f"- **Total Added:** {total_added} models")
        lines.append(f"- **Total Removed:** {total_removed} models")
        lines.append(f"- **Total Modified:** {total_modified} models")
        lines.append(f"- **Total Changes:** {total_changes}")
        lines.append("")

        for category in sorted(categories_data.keys()):
            diff, _, _ = categories_data[category]
            lines.append(f"### {category}")
            lines.append("")
            lines.append(f"- **Added:** {len(diff.added_models)} models")
            lines.append(f"- **Removed:** {len(diff.removed_models)} models")
            lines.append(f"- **Modified:** {len(diff.modified_models)} models")
            lines.append("")

            if diff.added_models:
                lines.append("**Added Models:**")
                for model_name in sorted(diff.added_models.keys())[:10]:
                    lines.append(f"- `{model_name}`")
                if len(diff.added_models) > 10:
                    lines.append(f"- ... and {len(diff.added_models) - 10} more")
                lines.append("")

            if diff.removed_models:
                lines.append("**Removed Models:**")
                for model_name in sorted(diff.removed_models.keys())[:10]:
                    lines.append(f"- `{model_name}`")
                if len(diff.removed_models) > 10:
                    lines.append(f"- ... and {len(diff.removed_models) - 10} more")
                lines.append("")

            if diff.modified_models:
                lines.append("**Modified Models:**")
                for model_name in sorted(diff.modified_models.keys())[:10]:
                    lines.append(f"- `{model_name}`")
                if len(diff.modified_models) > 10:
                    lines.append(f"- ... and {len(diff.modified_models) - 10} more")
                lines.append("")

        lines.append("---")
        lines.append("")
        lines.append("*This PR was automatically generated by the horde-model-reference GitHub sync service.*")
        lines.append("")
        lines.append(
            "Please review the changes carefully before merging. "
            "If you notice any issues, contact the PRIMARY instance administrator."
        )

        return "\n".join(lines)

settings instance-attribute

settings = github_sync_settings

_github_client instance-attribute

_github_client: Github | None

_installation_auth instance-attribute

_installation_auth: AppInstallationAuth | None = None

_temp_dir instance-attribute

_temp_dir: Path | None = None

_current_repo instance-attribute

_current_repo: Repo | None = None

_is_persistent_dir instance-attribute

_is_persistent_dir: bool = False

_original_branch instance-attribute

_original_branch: str | None = None

__init__

__init__() -> None

Initialize the GitHub sync client.

Source code in src/horde_model_reference/sync/github_client.py
def __init__(
    self,
) -> None:
    """Initialize the GitHub sync client."""
    from horde_model_reference.sync.config import github_sync_settings

    self.settings = github_sync_settings

    self._github_client: Github | None
    self._installation_auth: Auth.AppInstallationAuth | None = None

    # Try GitHub App authentication first, then fall back to token
    if github_app_settings.is_configured():
        logger.info("Using GitHub App installation authentication")
        self._github_client, self._installation_auth = self._create_app_authenticated_client()
    elif self.settings.github_token:
        logger.info("Using GitHub token authentication")
        auth = Auth.Token(self.settings.github_token)
        self._github_client = Github(auth=auth)
    else:
        raise RuntimeError("No GitHub authentication method configured")

    self._temp_dir: Path | None = None
    self._current_repo: Repo | None = None
    self._is_persistent_dir: bool = False
    self._original_branch: str | None = None

_create_app_authenticated_client

_create_app_authenticated_client() -> tuple[
    Github, Auth.AppInstallationAuth
]

Create a GitHub client using App installation authentication.

Returns:

  • tuple[Github, AppInstallationAuth]

    Tuple of (Github client, installation auth object) for later token access.

Raises:

  • RuntimeError

    If GitHub App settings are not properly configured.

Source code in src/horde_model_reference/sync/github_client.py
def _create_app_authenticated_client(self) -> tuple[Github, Auth.AppInstallationAuth]:
    """Create a GitHub client using App installation authentication.

    Returns:
        Tuple of (Github client, installation auth object) for later token access.

    Raises:
        RuntimeError: If GitHub App settings are not properly configured.

    """
    if not github_app_settings.is_configured():
        raise RuntimeError("GitHub App settings are not fully configured")

    try:
        private_key = github_app_settings.get_private_key_content()

        # Type assertions - is_configured() ensures these are not None
        assert github_app_settings.github_app_id is not None
        assert github_app_settings.github_installation_id is not None

        # Create App authentication
        app_auth = Auth.AppAuth(
            app_id=github_app_settings.github_app_id,
            private_key=private_key,
        )

        # Get installation authentication with all permissions
        # token_permissions can be specified if you want to limit permissions
        auth = app_auth.get_installation_auth(
            installation_id=github_app_settings.github_installation_id,
        )

        logger.debug(
            f"Created GitHub App installation auth for app_id={github_app_settings.github_app_id}, "
            f"installation_id={github_app_settings.github_installation_id}"
        )

        return Github(auth=auth), auth

    except Exception as e:
        logger.error(f"Failed to create GitHub App authenticated client: {e}")
        raise RuntimeError(f"GitHub App authentication failed: {e}") from e

__enter__

__enter__() -> GitHubSyncClient

Context manager entry - creates temporary directory.

Source code in src/horde_model_reference/sync/github_client.py
def __enter__(self) -> GitHubSyncClient:
    """Context manager entry - creates temporary directory."""
    return self

__exit__

__exit__(
    exc_type: type[BaseException] | None,
    exc_val: BaseException | None,
    exc_tb: TracebackType | None,
) -> None

Context manager exit - cleans up temporary directory.

Source code in src/horde_model_reference/sync/github_client.py
def __exit__(
    self,
    exc_type: type[BaseException] | None,
    exc_val: BaseException | None,
    exc_tb: TracebackType | None,
) -> None:
    """Context manager exit - cleans up temporary directory."""
    self.cleanup()

cleanup

cleanup() -> None

Clean up resources, preserving persistent directories.

SIGNIFICANCE: - Persistent target directories must NOT be deleted (reused across runs) - Only temp directories (self._is_persistent_dir == False) should be removed

Source code in src/horde_model_reference/sync/github_client.py
def cleanup(self) -> None:
    """Clean up resources, preserving persistent directories.

    SIGNIFICANCE:
    - Persistent target directories must NOT be deleted (reused across runs)
    - Only temp directories (self._is_persistent_dir == False) should be removed
    """
    if self._current_repo:
        self._current_repo.close()
        self._current_repo = None

    if self._temp_dir and self._temp_dir.exists():
        if self._is_persistent_dir:
            logger.debug(f"Preserving persistent clone at {self._temp_dir}")
        else:
            logger.debug(f"Cleaning up temporary directory: {self._temp_dir}")
            try:
                shutil.rmtree(self._temp_dir)
            except Exception as e:
                logger.warning(f"Failed to clean up temporary directory {self._temp_dir}: {e}")

    self._temp_dir = None
    self._is_persistent_dir = False
    self._original_branch = None

_branch_operation

_branch_operation() -> Generator[None]

Context manager to ensure repository is returned to original branch.

Captures the current branch before operations and restores it afterwards, even if an exception occurs. This prevents leaving repositories in a detached or temporary branch state.

Usage

with self._branch_operation(): # Create and work on temporary branch # Push changes and create PR # Branch will be restored automatically

Source code in src/horde_model_reference/sync/github_client.py
@contextmanager
def _branch_operation(self) -> Generator[None]:
    """Context manager to ensure repository is returned to original branch.

    Captures the current branch before operations and restores it afterwards,
    even if an exception occurs. This prevents leaving repositories in a
    detached or temporary branch state.

    Usage:
        with self._branch_operation():
            # Create and work on temporary branch
            # Push changes and create PR
            # Branch will be restored automatically
    """
    if not self._current_repo:
        raise RuntimeError("No repository available for branch operation")

    try:
        self._original_branch = self._current_repo.active_branch.name
        logger.debug(f"Saved original branch: {self._original_branch}")
    except Exception as e:
        raise RuntimeError(f"Failed to determine current branch: {e}") from e

    try:
        yield
    finally:
        if self._current_repo and self._original_branch:
            try:
                current_branch = self._current_repo.active_branch.name
                if current_branch != self._original_branch:
                    logger.debug(f"Restoring original branch: {self._original_branch}")
                    self._current_repo.git.checkout(self._original_branch)
            except Exception as e:
                logger.warning(f"Failed to restore original branch {self._original_branch}: {e}")

sync_category_to_github

sync_category_to_github(
    *,
    category: MODEL_REFERENCE_CATEGORY,
    diff: ModelReferenceDiff,
    primary_data: dict[str, dict[str, Any]],
    text_generation_artifacts: TextGenerationSyncArtifacts
    | None = None,
) -> str | None

Sync a category's model references to GitHub by creating a PR.

Parameters:

  • category (MODEL_REFERENCE_CATEGORY) –

    The category to sync.

  • diff (ModelReferenceDiff) –

    The detected differences for this category.

  • primary_data (dict[str, dict[str, Any]]) –

    The complete PRIMARY data for this category (legacy format).

  • text_generation_artifacts (TextGenerationSyncArtifacts | None, default: None ) –

    Pre-computed serialization artifacts for text_generation. When provided, _update_text_generation_files writes these directly instead of re-running the serializer.

Returns:

  • str | None

    The PR URL if created, None if no PR was needed or dry run.

Source code in src/horde_model_reference/sync/github_client.py
def sync_category_to_github(
    self,
    *,
    category: MODEL_REFERENCE_CATEGORY,
    diff: ModelReferenceDiff,
    primary_data: dict[str, dict[str, Any]],
    text_generation_artifacts: TextGenerationSyncArtifacts | None = None,
) -> str | None:
    """Sync a category's model references to GitHub by creating a PR.

    Args:
        category (MODEL_REFERENCE_CATEGORY): The category to sync.
        diff: The detected differences for this category.
        primary_data: The complete PRIMARY data for this category (legacy format).
        text_generation_artifacts: Pre-computed serialization artifacts for
            text_generation.  When provided, ``_update_text_generation_files``
            writes these directly instead of re-running the serializer.

    Returns:
        The PR URL if created, None if no PR was needed or dry run.

    """
    if not diff.has_changes():
        logger.info(f"No changes detected for {category}, skipping PR creation")
        return None

    if diff.total_changes() < self.settings.min_changes_threshold:
        logger.info(
            f"Only {diff.total_changes()} changes for {category} "
            f"(threshold: {self.settings.min_changes_threshold}), skipping PR"
        )
        return None

    if self.settings.dry_run:
        logger.info(f"[DRY RUN] Would create PR for {category} with {diff.total_changes()} changes")
        logger.info(f"[DRY RUN] Summary:\n{diff.summary()}")
        return None

    try:
        github_repo_settings = horde_model_reference_settings.get_repo_by_category(category)
        repo_owner_and_name = github_repo_settings.repo_owner_and_name
        logger.info(f"Starting sync for {category} to {repo_owner_and_name}")

        self._clone_repository(github_repo_settings)

        with self._branch_operation():
            branch_name = self._create_sync_branch(category)
            self._update_category_file(
                category,
                primary_data,
                text_generation_artifacts=text_generation_artifacts,
            )
            has_changes = self._commit_changes(category, diff)
            if not has_changes:
                logger.warning(
                    f"Skipping PR creation for {category}: comparator detected "
                    f"{diff.total_changes()} changes but no actual file changes were produced."
                )
                return None
            self._push_branch(branch_name)
            pr_url = self._create_pull_request(
                category, diff, repo_owner_and_name, branch_name, github_repo_settings
            )

        logger.success(f"Successfully created PR for {category}: {pr_url}")
        return pr_url

    except Exception as e:
        logger.error(f"Failed to sync {category} to GitHub: {e}")
        raise
    finally:
        self.cleanup()

sync_multiple_categories_to_github

sync_multiple_categories_to_github(
    *,
    repo_name: str,
    categories_data: dict[
        MODEL_REFERENCE_CATEGORY,
        tuple[
            ModelReferenceDiff,
            dict[str, dict[str, Any]],
            TextGenerationSyncArtifacts | None,
        ],
    ],
) -> str | None

Sync multiple categories to GitHub in a single PR.

Parameters:

Returns:

  • str | None

    The PR URL if created, None if no PR was needed or dry run.

Source code in src/horde_model_reference/sync/github_client.py
def sync_multiple_categories_to_github(
    self,
    *,
    repo_name: str,
    categories_data: dict[
        MODEL_REFERENCE_CATEGORY,
        tuple[ModelReferenceDiff, dict[str, dict[str, Any]], TextGenerationSyncArtifacts | None],
    ],
) -> str | None:
    """Sync multiple categories to GitHub in a single PR.

    Args:
        repo_name: Repository in 'owner/repo' format.
        categories_data: Dict mapping categories to (diff, primary_data, artifacts) tuples.

    Returns:
        The PR URL if created, None if no PR was needed or dry run.

    """
    total_changes = sum(diff.total_changes() for diff, _, _ in categories_data.values())

    if total_changes < self.settings.min_changes_threshold:
        logger.info(
            f"Only {total_changes} total changes across categories "
            f"(threshold: {self.settings.min_changes_threshold}), skipping PR"
        )
        return None

    if self.settings.dry_run:
        logger.info(
            f"[DRY RUN] Would create PR for {len(categories_data)} categories with {total_changes} changes"
        )
        for category, (diff, _, _) in categories_data.items():
            logger.info(f"[DRY RUN] {category}:\n{diff.summary()}")
        return None

    try:
        logger.info(f"Starting multi-category sync to {repo_name}")

        first_category = next(iter(categories_data.keys()))
        github_repo_settings = horde_model_reference_settings.get_repo_by_category(first_category)

        self._clone_repository(github_repo_settings)

        with self._branch_operation():
            branch_name = self._create_multi_category_sync_branch(list(categories_data.keys()))

            for category, (diff, primary_data, artifacts) in categories_data.items():
                logger.info(f"Updating {category} with {diff.total_changes()} changes")
                self._update_category_file(
                    category,
                    primary_data,
                    text_generation_artifacts=artifacts,
                )

            has_changes = self._commit_multi_category_changes(categories_data)
            if not has_changes:
                total = sum(diff.total_changes() for diff, _, _ in categories_data.values())
                logger.warning(
                    f"Skipping PR creation for multi-category sync: comparator detected "
                    f"{total} changes but no actual file changes were produced."
                )
                return None
            self._push_branch(branch_name)
            pr_url = self._create_multi_category_pull_request(
                categories_data, repo_name, branch_name, github_repo_settings
            )

        logger.success(f"Successfully created multi-category PR: {pr_url}")
        return pr_url

    except Exception as e:
        logger.error(f"Failed to sync multiple categories to GitHub: {e}")
        raise
    finally:
        self.cleanup()

_get_target_dir_for_repo

_get_target_dir_for_repo(
    github_settings: GithubRepoSettings,
) -> Path | None

Compute per-repository directory path within target_clone_dir.

Parameters:

  • github_settings (GithubRepoSettings) –

    GitHub repository settings containing owner/name.

Returns:

  • Path | None

    Path to {target_clone_dir}/{owner}/{repo}/ or None if target_clone_dir not configured.

Example

github_settings.owner = "Haidra-Org" github_settings.name = "AI-Horde-image-model-reference" target_clone_dir = "/path/to/clones" Returns: Path("/path/to/clones/Haidra-Org/AI-Horde-image-model-reference")

Source code in src/horde_model_reference/sync/github_client.py
def _get_target_dir_for_repo(self, github_settings: GithubRepoSettings) -> Path | None:
    """Compute per-repository directory path within target_clone_dir.

    Args:
        github_settings: GitHub repository settings containing owner/name.

    Returns:
        Path to {target_clone_dir}/{owner}/{repo}/ or None if target_clone_dir not configured.

    Example:
        github_settings.owner = "Haidra-Org"
        github_settings.name = "AI-Horde-image-model-reference"
        target_clone_dir = "/path/to/clones"
        Returns: Path("/path/to/clones/Haidra-Org/AI-Horde-image-model-reference")

    """
    if not self.settings.target_clone_dir:
        return None

    base_dir = Path(self.settings.target_clone_dir)
    return base_dir / github_settings.repo_owner_and_name

_verify_existing_repo

_verify_existing_repo(
    repo_path: Path,
    expected_github_settings: GithubRepoSettings,
) -> None

Verify existing directory matches expected repository identity.

Extracts and compares repository owner/name from git remote URL and current branch. Ignores authentication information in URLs when comparing.

Parameters:

  • repo_path (Path) –

    Path to existing git repository.

  • expected_github_settings (GithubRepoSettings) –

    Expected GitHub repository settings.

Raises:

  • RuntimeError

    If directory is not a git repository.

  • ValueError

    If owner/repo or branch doesn't match expected values.

Source code in src/horde_model_reference/sync/github_client.py
def _verify_existing_repo(self, repo_path: Path, expected_github_settings: GithubRepoSettings) -> None:
    """Verify existing directory matches expected repository identity.

    Extracts and compares repository owner/name from git remote URL and current branch.
    Ignores authentication information in URLs when comparing.

    Args:
        repo_path: Path to existing git repository.
        expected_github_settings: Expected GitHub repository settings.

    Raises:
        RuntimeError: If directory is not a git repository.
        ValueError: If owner/repo or branch doesn't match expected values.

    """
    git_dir = repo_path / ".git"
    if not git_dir.exists():
        raise RuntimeError(f"Target directory exists but is not a git repository: {repo_path}")

    try:
        repo = Repo(repo_path)
    except Exception as e:
        raise RuntimeError(f"Failed to open git repository at {repo_path}: {e}") from e

    try:
        remote_url = repo.remote("origin").url
    except Exception as e:
        raise RuntimeError(f"Failed to get remote URL from repository at {repo_path}: {e}") from e

    # Strip any authentication information before parsing
    clean_url = self._strip_auth_from_url(remote_url)
    actual_repo_name = self._parse_repo_name_from_url(clean_url)
    expected_repo_name = expected_github_settings.repo_owner_and_name

    if actual_repo_name != expected_repo_name:
        raise ValueError(
            f"Repository mismatch - Expected: {expected_repo_name}, Found: {actual_repo_name}. "
            f"Aborting to prevent data corruption."
        )

    logger.info(f"✓ Repository: {actual_repo_name} (matches expected)")

_strip_auth_from_url

_strip_auth_from_url(url: str) -> str

Strip authentication information from a git remote URL.

Parameters:

  • url (str) –

    Git remote URL that may contain authentication credentials.

Returns:

  • str

    URL with authentication information removed.

Example

"https://token@github.com/owner/repo.git" -> "https://github.com/owner/repo.git" "https://user:pass@github.com/owner/repo.git" -> "https://github.com/owner/repo.git"

Source code in src/horde_model_reference/sync/github_client.py
def _strip_auth_from_url(self, url: str) -> str:
    """Strip authentication information from a git remote URL.

    Args:
        url: Git remote URL that may contain authentication credentials.

    Returns:
        URL with authentication information removed.

    Example:
        "https://token@github.com/owner/repo.git" -> "https://github.com/owner/repo.git"
        "https://user:pass@github.com/owner/repo.git" -> "https://github.com/owner/repo.git"

    """
    parsed = urlparse(url)
    # Handle HTTPS URLs: strip credentials if host is github.com
    if parsed.scheme in ("http", "https") and parsed.hostname and parsed.hostname.lower() == "github.com":
        # Rebuild URL without username/password
        path = parsed.path or ""
        query = f"?{parsed.query}" if parsed.query else ""
        fragment = f"#{parsed.fragment}" if parsed.fragment else ""
        return f"{parsed.scheme}://github.com{path}{query}{fragment}"

    # Handle SSH URL format: git@github.com:owner/repo.git
    if url.startswith("git@github.com:"):
        # Already no credentials other than 'git'
        return url
    return url

_parse_repo_name_from_url

_parse_repo_name_from_url(url: str) -> str

Parse owner/repo from a git remote URL.

Parameters:

  • url (str) –

    Git remote URL (https:// or git@ format), should be cleaned of auth first.

Returns:

  • str

    Repository name in 'owner/repo' format.

Example

"https://github.com/Haidra-Org/AI-Horde-image-model-reference.git" -> "Haidra-Org/AI-Horde-image-model-reference"

Source code in src/horde_model_reference/sync/github_client.py
def _parse_repo_name_from_url(self, url: str) -> str:
    """Parse owner/repo from a git remote URL.

    Args:
        url: Git remote URL (https:// or git@ format), should be cleaned of auth first.

    Returns:
        Repository name in 'owner/repo' format.

    Example:
        "https://github.com/Haidra-Org/AI-Horde-image-model-reference.git"
        -> "Haidra-Org/AI-Horde-image-model-reference"

    """
    # HTTPS clone URL: https://github.com/owner/repo.git
    if url.startswith("https://"):
        parsed = urlparse(url)
        if parsed.hostname != "github.com":
            raise ValueError(f"URL hostname must be github.com, got: {parsed.hostname}")
        repo_path = parsed.path.replace(".git", "").strip("/")
        if "/" not in repo_path:
            raise ValueError(f"URL path '{repo_path}' does not match owner/repo format")
        return repo_path

    # SSH clone URL: git@github.com:owner/repo.git
    if url.startswith("git@github.com:"):
        repo_path = url[len("git@github.com:") :].replace(".git", "").strip("/")
        if "/" not in repo_path:
            raise ValueError(f"URL path '{repo_path}' does not match owner/repo format")
        return repo_path

    # Also handle "ssh://git@github.com/owner/repo.git"
    if url.startswith("ssh://"):
        parsed = urlparse(url)
        if parsed.hostname != "github.com":
            raise ValueError(f"URL hostname must be github.com, got: {parsed.hostname}")
        repo_path = parsed.path.replace(".git", "").strip("/")
        if repo_path.startswith("~"):
            repo_path = repo_path.lstrip("~/")
        if "/" not in repo_path:
            raise ValueError(f"URL path '{repo_path}' does not match owner/repo format")
        return repo_path

    raise ValueError(f"Unable to parse repository name from URL: {url}")

_check_for_local_changes

_check_for_local_changes(repo: Repo) -> bool

Check if repository has uncommitted changes.

Parameters:

  • repo (Repo) –

    GitPython Repo object.

Returns:

  • bool

    True if uncommitted changes or untracked files exist.

Source code in src/horde_model_reference/sync/github_client.py
def _check_for_local_changes(self, repo: Repo) -> bool:
    """Check if repository has uncommitted changes.

    Args:
        repo: GitPython Repo object.

    Returns:
        True if uncommitted changes or untracked files exist.

    """
    return repo.is_dirty(untracked_files=True)

_reset_existing_repo

_reset_existing_repo(
    repo: Repo, github_settings: GithubRepoSettings
) -> None

Reset existing repository to match remote branch state.

Sequence: 1. Fetch latest from origin 2. Check for local changes 3. If changes exist, prompt user to continue or abort 4. Hard reset to origin/{branch} 5. Clean untracked files

Parameters:

  • repo (Repo) –

    GitPython Repo object for repository to reset.

  • github_settings (GithubRepoSettings) –

    GitHub repository settings containing branch name.

Raises:

  • RuntimeError

    If user chooses to abort or on git operation failure.

Source code in src/horde_model_reference/sync/github_client.py
def _reset_existing_repo(self, repo: Repo, github_settings: GithubRepoSettings) -> None:
    """Reset existing repository to match remote branch state.

    Sequence:
    1. Fetch latest from origin
    2. Check for local changes
    3. If changes exist, prompt user to continue or abort
    4. Hard reset to origin/{branch}
    5. Clean untracked files

    Args:
        repo: GitPython Repo object for repository to reset.
        github_settings: GitHub repository settings containing branch name.

    Raises:
        RuntimeError: If user chooses to abort or on git operation failure.

    """
    logger.info("Fetching latest changes from origin...")
    try:
        repo.remotes.origin.fetch()
    except Exception as e:
        raise RuntimeError(f"Failed to fetch from origin: {e}") from e

    if self._check_for_local_changes(repo):
        logger.warning(f"Local changes detected in {repo.working_dir}")

        changed_files = []
        if repo.is_dirty():
            changed_files.extend([item.a_path for item in repo.index.diff(None) if item.a_path])
            changed_files.extend([item.a_path for item in repo.index.diff("HEAD") if item.a_path])
        if repo.untracked_files:
            changed_files.extend(repo.untracked_files)

        if changed_files:
            logger.warning("Changed files:")
            for file_path in sorted(set(changed_files))[:10]:
                logger.warning(f"  {file_path}")
            if len(changed_files) > 10:
                logger.warning(f"  ... and {len(changed_files) - 10} more")

        try:
            response = input("Continue and discard all local changes? (y/n): ").strip().lower()
            if response != "y":
                raise RuntimeError("Aborting to preserve local changes")
        except EOFError as e:
            raise RuntimeError("Aborting: no user input available") from e

    target_ref = f"origin/{github_settings.branch}"
    logger.info(f"Resetting to {target_ref}...")

    try:
        repo.head.reset(target_ref, index=True, working_tree=True)
        repo.git.clean("-fdx")
        logger.info(f"✓ Reset to {target_ref} and cleaned untracked files")
    except Exception as e:
        raise RuntimeError(f"Failed to reset repository: {e}") from e

_clone_repository

_clone_repository(
    github_settings: GithubRepoSettings,
) -> None

Clone or reuse GitHub repository.

SIGNIFICANCE: - Uses HordeModelReferenceSettings as single source of truth for GitHub URLs - github_settings contains owner, name, branch, and constructs all URLs - Multiple categories may map to same repo (persistence is per-repo, not per-category)

Flow: 1. Determine target directory via _get_target_dir_for_repo(github_settings) 2. If no persistent dir configured: use temp directory logic 3. If persistent dir exists: verify identity and reset 4. If persistent dir doesn't exist: clone to persistent dir

Parameters:

  • github_settings (GithubRepoSettings) –

    GitHub repository settings from HordeModelReferenceSettings.

Source code in src/horde_model_reference/sync/github_client.py
def _clone_repository(self, github_settings: GithubRepoSettings) -> None:
    """Clone or reuse GitHub repository.

    SIGNIFICANCE:
    - Uses HordeModelReferenceSettings as single source of truth for GitHub URLs
    - github_settings contains owner, name, branch, and constructs all URLs
    - Multiple categories may map to same repo (persistence is per-repo, not per-category)

    Flow:
    1. Determine target directory via _get_target_dir_for_repo(github_settings)
    2. If no persistent dir configured: use temp directory logic
    3. If persistent dir exists: verify identity and reset
    4. If persistent dir doesn't exist: clone to persistent dir

    Args:
        github_settings: GitHub repository settings from HordeModelReferenceSettings.

    """
    target_dir = self._get_target_dir_for_repo(github_settings)
    repo_name = github_settings.repo_owner_and_name

    if target_dir is None:
        if self.settings.sync_temp_dir:
            temp_base = Path(self.settings.sync_temp_dir)
            temp_base.mkdir(parents=True, exist_ok=True)
            self._temp_dir = Path(tempfile.mkdtemp(dir=temp_base))
        else:
            self._temp_dir = Path(tempfile.mkdtemp())

        logger.debug(f"Created temporary directory: {self._temp_dir}")
        self._is_persistent_dir = False

        repo_url = github_settings.git_clone_url
        logger.info(f"Cloning {repo_url} to {self._temp_dir}")

        try:
            self._current_repo = Repo.clone_from(
                url=repo_url,
                to_path=self._temp_dir,
                branch=github_settings.branch,
                depth=1,
            )
            logger.debug(f"Successfully cloned {repo_name}")
        except Exception as e:
            logger.error(f"Failed to clone repository {repo_name}: {e}")
            raise

    elif target_dir.exists():
        logger.info(f"Found existing clone at {target_dir}, verifying identity...")

        self._verify_existing_repo(target_dir, github_settings)

        try:
            self._current_repo = Repo(target_dir)
        except Exception as e:
            raise RuntimeError(f"Failed to open existing repository at {target_dir}: {e}") from e

        logger.info(f"Verified repository identity: {repo_name} (branch: {github_settings.branch})")

        self._reset_existing_repo(self._current_repo, github_settings)

        self._temp_dir = target_dir
        self._is_persistent_dir = True

    else:
        target_dir.parent.mkdir(parents=True, exist_ok=True)

        repo_url = github_settings.git_clone_url
        logger.info(f"Cloning {repo_url} to {target_dir}...")

        try:
            self._current_repo = Repo.clone_from(
                url=repo_url,
                to_path=target_dir,
                branch=github_settings.branch,
            )
            logger.info(f"Successfully cloned {repo_name} (branch: {github_settings.branch})")
        except Exception as e:
            logger.error(f"Failed to clone repository {repo_name}: {e}")
            raise

        self._temp_dir = target_dir
        self._is_persistent_dir = True

_create_sync_branch

_create_sync_branch(
    category: MODEL_REFERENCE_CATEGORY,
) -> str

Create a new branch for the sync operation.

Parameters:

Returns:

  • str

    The name of the created branch.

Source code in src/horde_model_reference/sync/github_client.py
def _create_sync_branch(self, category: MODEL_REFERENCE_CATEGORY) -> str:
    """Create a new branch for the sync operation.

    Args:
        category (MODEL_REFERENCE_CATEGORY): The category being synced.

    Returns:
        The name of the created branch.

    """
    if not self._current_repo:
        raise RuntimeError("No repository cloned")

    timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
    branch_name = f"sync/{category}/{timestamp}"

    logger.debug(f"Creating branch: {branch_name}")
    self._current_repo.git.checkout("-b", branch_name)

    return branch_name

_generate_backend_prefixes_for_github

_generate_backend_prefixes_for_github(
    grouped_data: dict[str, dict[str, Any]],
) -> dict[str, dict[str, Any]]

Generate backend prefix duplicates for GitHub sync (legacy format compatibility).

This replicates the logic from scripts/legacy_text/convert.py lines 85-87. For each base model, creates 3 entries: 1. Base entry (e.g., "llama-2-7b") 2. Aphrodite prefixed (e.g., "aphrodite/llama-2-7b") 3. KoboldCPP prefixed (e.g., "koboldcpp/llama-2-7b")

This is ONLY used for GitHub sync to maintain backward compatibility with the legacy GitHub JSON format. Internally, we store grouped data (CSV).

Parameters:

  • grouped_data (dict[str, dict[str, Any]]) –

    The grouped model data (one entry per base model).

Returns:

  • dict[str, dict[str, Any]]

    dict[str, dict[str, Any]]: Model data with backend prefix duplicates.

Source code in src/horde_model_reference/sync/github_client.py
def _generate_backend_prefixes_for_github(
    self,
    grouped_data: dict[str, dict[str, Any]],
) -> dict[str, dict[str, Any]]:
    """Generate backend prefix duplicates for GitHub sync (legacy format compatibility).

    This replicates the logic from scripts/legacy_text/convert.py lines 85-87.
    For each base model, creates 3 entries:
    1. Base entry (e.g., "llama-2-7b")
    2. Aphrodite prefixed (e.g., "aphrodite/llama-2-7b")
    3. KoboldCPP prefixed (e.g., "koboldcpp/llama-2-7b")

    This is ONLY used for GitHub sync to maintain backward compatibility with
    the legacy GitHub JSON format. Internally, we store grouped data (CSV).

    Args:
        grouped_data: The grouped model data (one entry per base model).

    Returns:
        dict[str, dict[str, Any]]: Model data with backend prefix duplicates.

    """
    from horde_model_reference.meta_consts import TEXT_BACKENDS
    from horde_model_reference.text_backend_names import TEXT_LEGACY_BACKEND_PREFIXES

    result: dict[str, dict[str, Any]] = {}

    for name, record in grouped_data.items():
        model_name = record.get("model_name", name)

        # Generate 3 entries: base, aphrodite/, koboldcpp/
        key_formats = [
            ("{name}", name),  # Base entry
            (f"{TEXT_LEGACY_BACKEND_PREFIXES[TEXT_BACKENDS.aphrodite]}{{name}}", name),  # aphrodite/
            (
                f"{TEXT_LEGACY_BACKEND_PREFIXES[TEXT_BACKENDS.koboldcpp]}{{model_name}}",
                model_name,
            ),  # koboldcpp/
        ]

        for key_format, value in key_formats:
            key = key_format.format(name=value, model_name=model_name) if "{" in key_format else key_format
            # Create a copy with the updated name field
            record_copy = record.copy()
            record_copy["name"] = key
            result[key] = record_copy

    logger.debug(
        f"Generated {len(result)} total records from {len(grouped_data)} base models "
        "(including backend prefix duplicates for GitHub)"
    )
    return result

_update_category_file

_update_category_file(
    category: MODEL_REFERENCE_CATEGORY,
    primary_data: dict[str, dict[str, Any]],
    *,
    text_generation_artifacts: TextGenerationSyncArtifacts
    | None = None,
) -> None

Update the category file(s) with PRIMARY data.

For text_generation, produces both models.csv and db.json via the CSV-mediated serialization pipeline (matching upstream convert.py output). For other categories, writes a single JSON file.

Parameters:

  • category (MODEL_REFERENCE_CATEGORY) –

    The category to update.

  • primary_data (dict[str, dict[str, Any]]) –

    The complete PRIMARY data in legacy format (grouped, no backend prefixes).

  • text_generation_artifacts (TextGenerationSyncArtifacts | None, default: None ) –

    Pre-computed serialization artifacts for text_generation. When provided, files are written directly without re-running the serializer.

Source code in src/horde_model_reference/sync/github_client.py
def _update_category_file(
    self,
    category: MODEL_REFERENCE_CATEGORY,
    primary_data: dict[str, dict[str, Any]],
    *,
    text_generation_artifacts: TextGenerationSyncArtifacts | None = None,
) -> None:
    """Update the category file(s) with PRIMARY data.

    For text_generation, produces both models.csv and db.json via the
    CSV-mediated serialization pipeline (matching upstream convert.py output).
    For other categories, writes a single JSON file.

    Args:
        category (MODEL_REFERENCE_CATEGORY): The category to update.
        primary_data: The complete PRIMARY data in legacy format (grouped, no backend prefixes).
        text_generation_artifacts: Pre-computed serialization artifacts for
            text_generation. When provided, files are written directly without
            re-running the serializer.

    """
    if not self._current_repo or not self._temp_dir:
        raise RuntimeError("No repository cloned")

    if category == MODEL_REFERENCE_CATEGORY.text_generation:
        self._update_text_generation_files(primary_data, artifacts=text_generation_artifacts)
        return

    filename = str(horde_model_reference_paths.get_model_reference_filename(category))
    file_path = self._temp_dir / filename

    logger.debug(f"Updating {file_path} with PRIMARY data")

    serialized_data = json.dumps(primary_data, indent=4, sort_keys=False)
    serialized_data = serialized_data + "\n"

    file_path.write_text(serialized_data, encoding="utf-8")
    logger.debug(f"Wrote {len(primary_data)} models to {file_path}")

_update_text_generation_files

_update_text_generation_files(
    primary_data: dict[str, dict[str, Any]],
    *,
    artifacts: TextGenerationSyncArtifacts | None = None,
) -> None

Update text_generation by producing both models.csv and db.json.

Uses the CSV-mediated serialization pipeline to guarantee db.json output is byte-compatible with the upstream convert.py.

Parameters:

  • primary_data (dict[str, dict[str, Any]]) –

    The complete PRIMARY data (may include backend-prefixed entries).

  • artifacts (TextGenerationSyncArtifacts | None, default: None ) –

    Pre-computed serialization artifacts. When provided, these are written directly instead of re-running the serializer.

Source code in src/horde_model_reference/sync/github_client.py
def _update_text_generation_files(
    self,
    primary_data: dict[str, dict[str, Any]],
    *,
    artifacts: TextGenerationSyncArtifacts | None = None,
) -> None:
    """Update text_generation by producing both models.csv and db.json.

    Uses the CSV-mediated serialization pipeline to guarantee db.json
    output is byte-compatible with the upstream convert.py.

    Args:
        primary_data: The complete PRIMARY data (may include backend-prefixed entries).
        artifacts: Pre-computed serialization artifacts. When provided, these
            are written directly instead of re-running the serializer.

    """
    assert self._temp_dir is not None

    if artifacts is None:
        from horde_model_reference.sync.text_generation_serializer import TextGenerationSerializer
        from horde_model_reference.text_backend_names import has_legacy_text_backend_prefix

        base_records = {
            name: record for name, record in primary_data.items() if not has_legacy_text_backend_prefix(name)
        }

        logger.debug(f"Serializing {len(base_records)} base text generation records via CSV pipeline")

        serializer = TextGenerationSerializer()
        existing_csv_path = self._temp_dir / "models.csv"
        artifacts = serializer.serialize(
            primary_base_records=base_records,
            existing_csv_path=existing_csv_path,
        )
    else:
        logger.debug("Using pre-computed text generation serialization artifacts")

    csv_path = self._temp_dir / "models.csv"
    csv_path.write_text(artifacts.csv_content, encoding="utf-8")
    db_json_path = self._temp_dir / "db.json"
    db_json_path.write_text(artifacts.json_content, encoding="utf-8")

    logger.debug(f"Wrote models.csv and db.json for text_generation to {self._temp_dir}")

_commit_changes

_commit_changes(
    category: MODEL_REFERENCE_CATEGORY,
    diff: ModelReferenceDiff,
) -> bool

Commit the changes to the repository.

Uses --no-gpg-sign to bypass GPG signing requirements for automated commits. This prevents issues when running in environments without GPG configured.

Parameters:

Returns:

  • bool

    True if changes were committed, False if there were no changes to commit.

Source code in src/horde_model_reference/sync/github_client.py
def _commit_changes(
    self,
    category: MODEL_REFERENCE_CATEGORY,
    diff: ModelReferenceDiff,
) -> bool:
    """Commit the changes to the repository.

    Uses --no-gpg-sign to bypass GPG signing requirements for automated commits.
    This prevents issues when running in environments without GPG configured.

    Args:
        category (MODEL_REFERENCE_CATEGORY): The category being synced.
        diff: The diff summary for generating commit message.

    Returns:
        True if changes were committed, False if there were no changes to commit.

    """
    if not self._current_repo:
        raise RuntimeError("No repository cloned")

    self._current_repo.git.add(".")

    if not self._current_repo.is_dirty():
        logger.warning(
            f"No actual file changes for {category} despite comparator detecting "
            f"{diff.total_changes()} differences. This indicates the comparison "
            "produced false positives (e.g. due to JSON parser inconsistencies)."
        )
        return False

    commit_message = self._generate_commit_message(category, diff)
    logger.debug(f"Committing with message:\n{commit_message}")

    self._current_repo.git.commit("-m", commit_message, "--no-gpg-sign")
    logger.debug("Changes committed successfully")
    return True

_push_branch

_push_branch(branch_name: str) -> None

Push the branch to the remote repository.

Parameters:

  • branch_name (str) –

    The name of the branch to push.

Source code in src/horde_model_reference/sync/github_client.py
def _push_branch(self, branch_name: str) -> None:
    """Push the branch to the remote repository.

    Args:
        branch_name: The name of the branch to push.

    """
    if not self._current_repo:
        raise RuntimeError("No repository cloned?")

    try:
        repo_url_with_auth = self._get_authenticated_repo_url()

        logger.info(f"Pushing branch {branch_name}")
        self._current_repo.git.push(repo_url_with_auth, branch_name)
        logger.debug("Branch pushed successfully")
    except Exception as e:
        logger.error(f"Failed to push branch {branch_name}: {e}")
        raise

_get_authenticated_repo_url

_get_authenticated_repo_url() -> str

Get the repository URL with authentication token.

Returns:

  • str

    The authenticated repository URL.

Source code in src/horde_model_reference/sync/github_client.py
def _get_authenticated_repo_url(self) -> str:
    """Get the repository URL with authentication token.

    Returns:
        The authenticated repository URL.

    """
    if not self._current_repo:
        raise RuntimeError("No repository cloned")

    remote_url = self._current_repo.remote("origin").url

    # Strip any existing authentication before adding new token
    clean_url = self._strip_auth_from_url(remote_url)

    hostname = urlparse(clean_url).hostname
    if hostname and hostname.lower() == "github.com":
        repo_path = self._parse_repo_name_from_url(clean_url)

        # Try GitHub App authentication first
        if self._installation_auth is not None:
            try:
                # Use the installation auth object we stored during initialization
                token = self._installation_auth.token
                return f"https://x-access-token:{token}@github.com/{repo_path}.git"
            except Exception as e:
                logger.warning(f"Failed to get GitHub App token for push: {e}")
                # Fall through to token auth

        # Fall back to personal access token
        if self.settings.github_token:
            return f"https://{self.settings.github_token}@github.com/{repo_path}.git"

    return remote_url

_find_existing_sync_prs

_find_existing_sync_prs(
    repo_name: str,
    category: MODEL_REFERENCE_CATEGORY | None = None,
) -> list[Any]

Find existing open PRs created by the sync service.

Parameters:

  • repo_name (str) –

    Repository in 'owner/repo' format.

  • category (MODEL_REFERENCE_CATEGORY | None, default: None ) –

    Optional category to filter PRs. If None, finds all sync PRs.

Returns:

  • list[Any]

    List of open pull request objects created by the sync service.

Source code in src/horde_model_reference/sync/github_client.py
def _find_existing_sync_prs(self, repo_name: str, category: MODEL_REFERENCE_CATEGORY | None = None) -> list[Any]:
    """Find existing open PRs created by the sync service.

    Args:
        repo_name: Repository in 'owner/repo' format.
        category: Optional category to filter PRs. If None, finds all sync PRs.

    Returns:
        List of open pull request objects created by the sync service.

    """
    if not self._github_client:
        raise RuntimeError("GitHub client not initialized")

    try:
        repo = self._github_client.get_repo(repo_name)
        open_prs = repo.get_pulls(state="open", sort="created", direction="desc")

        sync_prs = []
        for pr in open_prs:
            # Check if PR is from a sync branch and matches category filter (if specified)
            if pr.head.ref.startswith("sync/") and (
                category is None or f"sync/{category}/" in pr.head.ref or "sync/multi-category/" in pr.head.ref
            ):
                sync_prs.append(pr)

        return sync_prs

    except GithubException as e:
        logger.warning(f"Failed to find existing sync PRs: {e}")
        return []

_close_existing_sync_prs

_close_existing_sync_prs(
    repo_name: str,
    category: MODEL_REFERENCE_CATEGORY | None = None,
) -> None

Close existing open PRs created by the sync service.

Parameters:

  • repo_name (str) –

    Repository in 'owner/repo' format.

  • category (MODEL_REFERENCE_CATEGORY | None, default: None ) –

    Optional category to filter PRs. If None, closes all sync PRs.

Source code in src/horde_model_reference/sync/github_client.py
def _close_existing_sync_prs(self, repo_name: str, category: MODEL_REFERENCE_CATEGORY | None = None) -> None:
    """Close existing open PRs created by the sync service.

    Args:
        repo_name: Repository in 'owner/repo' format.
        category: Optional category to filter PRs. If None, closes all sync PRs.

    """
    existing_prs = self._find_existing_sync_prs(repo_name, category)

    if not existing_prs:
        logger.debug(f"No existing sync PRs found for {repo_name}")
        return

    logger.info(f"Found {len(existing_prs)} existing sync PR(s) to close")

    for pr in existing_prs:
        try:
            # Add a comment explaining why it's being closed
            comment = (
                "This PR is being automatically closed because a new sync operation has been initiated.\n\n"
                "A new PR with updated changes will be created shortly."
            )
            pr.create_issue_comment(comment)

            # Close the PR
            pr.edit(state="closed")
            logger.info(f"Closed PR #{pr.number}: {pr.title}")

        except GithubException as e:
            logger.warning(f"Failed to close PR #{pr.number}: {e}")

_create_pull_request

_create_pull_request(
    category: MODEL_REFERENCE_CATEGORY,
    diff: ModelReferenceDiff,
    repo_name: str,
    branch_name: str,
    github_settings: GithubRepoSettings,
) -> str

Create a pull request for the sync.

Parameters:

  • category (MODEL_REFERENCE_CATEGORY) –

    The category being synced.

  • diff (ModelReferenceDiff) –

    The diff summary for generating PR description.

  • repo_name (str) –

    Repository in 'owner/repo' format.

  • branch_name (str) –

    The name of the branch to create PR from.

  • github_settings (GithubRepoSettings) –

    GitHub repository settings containing branch name.

Returns:

  • str

    The URL of the created PR.

Source code in src/horde_model_reference/sync/github_client.py
def _create_pull_request(
    self,
    category: MODEL_REFERENCE_CATEGORY,
    diff: ModelReferenceDiff,
    repo_name: str,
    branch_name: str,
    github_settings: GithubRepoSettings,
) -> str:
    """Create a pull request for the sync.

    Args:
        category (MODEL_REFERENCE_CATEGORY): The category being synced.
        diff: The diff summary for generating PR description.
        repo_name: Repository in 'owner/repo' format.
        branch_name: The name of the branch to create PR from.
        github_settings: GitHub repository settings containing branch name.

    Returns:
        The URL of the created PR.

    """
    if not self._github_client:
        raise RuntimeError("GitHub client not initialized")

    # Close any existing sync PRs for this category
    self._close_existing_sync_prs(repo_name, category)

    try:
        repo = self._github_client.get_repo(repo_name)
        title = self._generate_pr_title(category)
        body = self._generate_pr_body(category, diff)

        logger.info(f"Creating PR: {title}")

        pr = repo.create_pull(
            title=title,
            body=body,
            head=branch_name,
            base=github_settings.branch,
        )

        if self.settings.pr_labels:
            pr.add_to_labels(*self.settings.pr_labels)

        if self.settings.pr_reviewers:
            try:
                pr.create_review_request(reviewers=self.settings.pr_reviewers)
            except GithubException as e:
                logger.warning(f"Failed to assign reviewers: {e}")

        if self.settings.pr_auto_assign_team:
            try:
                team_slug = self.settings.pr_auto_assign_team.split("/")[-1]
                pr.create_review_request(team_reviewers=[team_slug])
            except GithubException as e:
                logger.warning(f"Failed to assign team: {e}")

        return pr.html_url

    except GithubException as e:
        logger.error(f"Failed to create PR: {e}")
        raise

_generate_commit_message

_generate_commit_message(
    category: MODEL_REFERENCE_CATEGORY,
    diff: ModelReferenceDiff,
) -> str

Generate a commit message from the diff.

Parameters:

Returns:

  • str

    The commit message.

Source code in src/horde_model_reference/sync/github_client.py
def _generate_commit_message(
    self,
    category: MODEL_REFERENCE_CATEGORY,
    diff: ModelReferenceDiff,
) -> str:
    """Generate a commit message from the diff.

    Args:
        category (MODEL_REFERENCE_CATEGORY): The category being synced.
        diff: The diff summary.

    Returns:
        The commit message.

    """
    lines = [f"Sync {category} from PRIMARY instance"]
    lines.append("")

    if diff.added_models:
        lines.append(f"Added {len(diff.added_models)} models:")
        for model_name in sorted(diff.added_models.keys())[:10]:
            lines.append(f"  + {model_name}")
        if len(diff.added_models) > 10:
            lines.append(f"  ... and {len(diff.added_models) - 10} more")

    if diff.removed_models:
        lines.append(f"\nRemoved {len(diff.removed_models)} models:")
        for model_name in sorted(diff.removed_models.keys())[:10]:
            lines.append(f"  - {model_name}")
        if len(diff.removed_models) > 10:
            lines.append(f"  ... and {len(diff.removed_models) - 10} more")

    if diff.modified_models:
        lines.append(f"\nModified {len(diff.modified_models)} models:")
        for model_name in sorted(diff.modified_models.keys())[:10]:
            lines.append(f"  ~ {model_name}")
        if len(diff.modified_models) > 10:
            lines.append(f"  ... and {len(diff.modified_models) - 10} more")

    lines.append("")
    lines.append("Generated by horde-model-reference GitHub sync service")

    return "\n".join(lines)

_generate_pr_title

_generate_pr_title(
    category: MODEL_REFERENCE_CATEGORY,
) -> str

Generate a PR title.

Parameters:

Returns:

  • str

    The PR title.

Source code in src/horde_model_reference/sync/github_client.py
def _generate_pr_title(self, category: MODEL_REFERENCE_CATEGORY) -> str:
    """Generate a PR title.

    Args:
        category (MODEL_REFERENCE_CATEGORY): The category being synced.

    Returns:
        The PR title.

    """
    date_str = datetime.now().strftime("%Y-%m-%d")
    return f"Auto Sync {category} from horde_model_reference service - {date_str}"

_generate_pr_body

_generate_pr_body(
    category: MODEL_REFERENCE_CATEGORY,
    diff: ModelReferenceDiff,
) -> str

Generate a PR description from the diff.

Parameters:

Returns:

  • str

    The PR body in Markdown format.

Source code in src/horde_model_reference/sync/github_client.py
def _generate_pr_body(
    self,
    category: MODEL_REFERENCE_CATEGORY,
    diff: ModelReferenceDiff,
) -> str:
    """Generate a PR description from the diff.

    Args:
        category (MODEL_REFERENCE_CATEGORY): The category being synced.
        diff: The diff summary.

    Returns:
        The PR body in Markdown format.

    """
    lines = [
        "## Automated Sync from horde_model_reference service",
        "",
        f"This PR synchronizes the `{category}` model references from the horde_model_reference service.",
        "",
        "### Changes Summary",
        "",
    ]

    if self.settings.primary_api_url:
        lines.append(f"**Source:** {self.settings.primary_api_url}")
        lines.append("")

    lines.append(f"- **Added:** {len(diff.added_models)} models")
    lines.append(f"- **Removed:** {len(diff.removed_models)} models")
    lines.append(f"- **Modified:** {len(diff.modified_models)} models")
    lines.append(f"- **Total Changes:** {diff.total_changes()}")
    lines.append("")

    if diff.added_models:
        lines.append("#### Added Models")
        lines.append("")
        for model_name in sorted(diff.added_models.keys())[:20]:
            lines.append(f"- `{model_name}`")
        if len(diff.added_models) > 20:
            lines.append(f"- ... and {len(diff.added_models) - 20} more")
        lines.append("")

    if diff.removed_models:
        lines.append("#### Removed Models")
        lines.append("")
        for model_name in sorted(diff.removed_models.keys())[:20]:
            lines.append(f"- `{model_name}`")
        if len(diff.removed_models) > 20:
            lines.append(f"- ... and {len(diff.removed_models) - 20} more")
        lines.append("")

    if diff.modified_models:
        lines.append("#### Modified Models")
        lines.append("")
        for model_name in sorted(diff.modified_models.keys())[:20]:
            lines.append(f"- `{model_name}`")
        if len(diff.modified_models) > 20:
            lines.append(f"- ... and {len(diff.modified_models) - 20} more")
        lines.append("")

    lines.append("---")
    lines.append("")
    lines.append("*This PR was automatically generated by the horde-model-reference GitHub sync service.*")
    lines.append("")
    lines.append(
        "Please review the changes carefully before merging. "
        "If you notice any issues, contact the horde_model_reference service administrator."
    )

    return "\n".join(lines)

_create_multi_category_sync_branch

_create_multi_category_sync_branch(
    categories: list[MODEL_REFERENCE_CATEGORY],
) -> str

Create a new branch for multi-category sync operation.

Parameters:

Returns:

  • str

    The name of the created branch.

Source code in src/horde_model_reference/sync/github_client.py
def _create_multi_category_sync_branch(self, categories: list[MODEL_REFERENCE_CATEGORY]) -> str:
    """Create a new branch for multi-category sync operation.

    Args:
        categories: The list of categories being synced.

    Returns:
        The name of the created branch.

    """
    if not self._current_repo:
        raise RuntimeError("No repository cloned")

    timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
    branch_name = f"sync/multi-category/{timestamp}"

    logger.debug(f"Creating multi-category branch: {branch_name}")
    self._current_repo.git.checkout("-b", branch_name)

    return branch_name

_commit_multi_category_changes

_commit_multi_category_changes(
    categories_data: dict[
        MODEL_REFERENCE_CATEGORY,
        tuple[
            ModelReferenceDiff,
            dict[str, dict[str, Any]],
            TextGenerationSyncArtifacts | None,
        ],
    ],
) -> bool

Commit changes for multiple categories.

Uses --no-gpg-sign to bypass GPG signing requirements for automated commits. This prevents issues when running in environments without GPG configured.

Parameters:

Returns:

  • bool

    True if changes were committed, False if there were no changes to commit.

Source code in src/horde_model_reference/sync/github_client.py
def _commit_multi_category_changes(
    self,
    categories_data: dict[
        MODEL_REFERENCE_CATEGORY,
        tuple[ModelReferenceDiff, dict[str, dict[str, Any]], TextGenerationSyncArtifacts | None],
    ],
) -> bool:
    """Commit changes for multiple categories.

    Uses --no-gpg-sign to bypass GPG signing requirements for automated commits.
    This prevents issues when running in environments without GPG configured.

    Args:
        categories_data: Dict mapping categories to (diff, primary_data, artifacts) tuples.

    Returns:
        True if changes were committed, False if there were no changes to commit.

    """
    if not self._current_repo:
        raise RuntimeError("No repository cloned")

    self._current_repo.git.add(".")

    if not self._current_repo.is_dirty():
        total = sum(diff.total_changes() for diff, _, _ in categories_data.values())
        logger.warning(
            f"No actual file changes despite comparator detecting {total} total "
            "differences across categories. This indicates the comparison "
            "produced false positives (e.g. due to JSON parser inconsistencies)."
        )
        return False

    commit_message = self._generate_multi_category_commit_message(categories_data)
    logger.debug(f"Committing with message:\n{commit_message}")

    self._current_repo.git.commit("-m", commit_message, "--no-gpg-sign")
    logger.debug("Changes committed successfully")
    return True

_generate_multi_category_commit_message

_generate_multi_category_commit_message(
    categories_data: dict[
        MODEL_REFERENCE_CATEGORY,
        tuple[
            ModelReferenceDiff,
            dict[str, dict[str, Any]],
            TextGenerationSyncArtifacts | None,
        ],
    ],
) -> str

Generate a commit message for multi-category sync.

Parameters:

Returns:

  • str

    The commit message.

Source code in src/horde_model_reference/sync/github_client.py
def _generate_multi_category_commit_message(
    self,
    categories_data: dict[
        MODEL_REFERENCE_CATEGORY,
        tuple[ModelReferenceDiff, dict[str, dict[str, Any]], TextGenerationSyncArtifacts | None],
    ],
) -> str:
    """Generate a commit message for multi-category sync.

    Args:
        categories_data: Dict mapping categories to (diff, primary_data, artifacts) tuples.

    Returns:
        The commit message.

    """
    category_names = ", ".join(str(cat) for cat in sorted(categories_data.keys()))
    total_changes = sum(diff.total_changes() for diff, _, _ in categories_data.values())

    lines = ["Sync multiple categories from PRIMARY instance"]
    lines.append("")
    lines.append(f"Categories: {category_names}")
    lines.append(f"Total changes: {total_changes}")
    lines.append("")

    for category in sorted(categories_data.keys()):
        diff, _, _ = categories_data[category]
        lines.append(f"## {category}")

        if diff.added_models:
            lines.append(f"Added {len(diff.added_models)} models:")
            for model_name in sorted(diff.added_models.keys())[:5]:
                lines.append(f"  + {model_name}")
            if len(diff.added_models) > 5:
                lines.append(f"  ... and {len(diff.added_models) - 5} more")

        if diff.removed_models:
            lines.append(f"Removed {len(diff.removed_models)} models:")
            for model_name in sorted(diff.removed_models.keys())[:5]:
                lines.append(f"  - {model_name}")
            if len(diff.removed_models) > 5:
                lines.append(f"  ... and {len(diff.removed_models) - 5} more")

        if diff.modified_models:
            lines.append(f"Modified {len(diff.modified_models)} models:")
            for model_name in sorted(diff.modified_models.keys())[:5]:
                lines.append(f"  ~ {model_name}")
            if len(diff.modified_models) > 5:
                lines.append(f"  ... and {len(diff.modified_models) - 5} more")

        lines.append("")

    lines.append("Generated by horde-model-reference GitHub sync service")

    return "\n".join(lines)

_create_multi_category_pull_request

_create_multi_category_pull_request(
    categories_data: dict[
        MODEL_REFERENCE_CATEGORY,
        tuple[
            ModelReferenceDiff,
            dict[str, dict[str, Any]],
            TextGenerationSyncArtifacts | None,
        ],
    ],
    repo_name: str,
    branch_name: str,
    github_settings: GithubRepoSettings,
) -> str

Create a pull request for multi-category sync.

Parameters:

Returns:

  • str

    The URL of the created PR.

Source code in src/horde_model_reference/sync/github_client.py
def _create_multi_category_pull_request(
    self,
    categories_data: dict[
        MODEL_REFERENCE_CATEGORY,
        tuple[ModelReferenceDiff, dict[str, dict[str, Any]], TextGenerationSyncArtifacts | None],
    ],
    repo_name: str,
    branch_name: str,
    github_settings: GithubRepoSettings,
) -> str:
    """Create a pull request for multi-category sync.

    Args:
        categories_data: Dict mapping categories to (diff, primary_data) tuples.
        repo_name: Repository in 'owner/repo' format.
        branch_name: The name of the branch to create PR from.
        github_settings: GitHub repository settings containing branch name.

    Returns:
        The URL of the created PR.

    """
    if not self._github_client:
        raise RuntimeError("GitHub client not initialized")

    # Close any existing sync PRs for this repository
    # For multi-category PRs, we close all sync PRs regardless of category
    self._close_existing_sync_prs(repo_name, category=None)

    try:
        repo = self._github_client.get_repo(repo_name)
        title = self._generate_multi_category_pr_title(list(categories_data.keys()))
        body = self._generate_multi_category_pr_body(categories_data)

        logger.info(f"Creating multi-category PR: {title}")

        pr = repo.create_pull(
            title=title,
            body=body,
            head=branch_name,
            base=github_settings.branch,
        )

        if self.settings.pr_labels:
            pr.add_to_labels(*self.settings.pr_labels)

        if self.settings.pr_reviewers:
            try:
                pr.create_review_request(reviewers=self.settings.pr_reviewers)
            except GithubException as e:
                logger.warning(f"Failed to assign reviewers: {e}")

        if self.settings.pr_auto_assign_team:
            try:
                team_slug = self.settings.pr_auto_assign_team.split("/")[-1]
                pr.create_review_request(team_reviewers=[team_slug])
            except GithubException as e:
                logger.warning(f"Failed to assign team: {e}")

        return pr.html_url

    except GithubException as e:
        logger.error(f"Failed to create PR: {e}")
        raise

_generate_multi_category_pr_title

_generate_multi_category_pr_title(
    categories: list[MODEL_REFERENCE_CATEGORY],
) -> str

Generate a PR title for multi-category sync.

Parameters:

Returns:

  • str

    The PR title.

Source code in src/horde_model_reference/sync/github_client.py
def _generate_multi_category_pr_title(self, categories: list[MODEL_REFERENCE_CATEGORY]) -> str:
    """Generate a PR title for multi-category sync.

    Args:
        categories: The list of categories being synced.

    Returns:
        The PR title.

    """
    date_str = datetime.now().strftime("%Y-%m-%d")
    return f"Sync multiple categories from PRIMARY instance - {date_str}"

_generate_multi_category_pr_body

_generate_multi_category_pr_body(
    categories_data: dict[
        MODEL_REFERENCE_CATEGORY,
        tuple[
            ModelReferenceDiff,
            dict[str, dict[str, Any]],
            TextGenerationSyncArtifacts | None,
        ],
    ],
) -> str

Generate a PR description for multi-category sync.

Parameters:

Returns:

  • str

    The PR body in Markdown format.

Source code in src/horde_model_reference/sync/github_client.py
def _generate_multi_category_pr_body(
    self,
    categories_data: dict[
        MODEL_REFERENCE_CATEGORY,
        tuple[ModelReferenceDiff, dict[str, dict[str, Any]], TextGenerationSyncArtifacts | None],
    ],
) -> str:
    """Generate a PR description for multi-category sync.

    Args:
        categories_data: Dict mapping categories to (diff, primary_data, artifacts) tuples.

    Returns:
        The PR body in Markdown format.

    """
    total_added = sum(len(diff.added_models) for diff, _, _ in categories_data.values())
    total_removed = sum(len(diff.removed_models) for diff, _, _ in categories_data.values())
    total_modified = sum(len(diff.modified_models) for diff, _, _ in categories_data.values())
    total_changes = total_added + total_removed + total_modified

    lines = [
        "## Automated Multi-Category Sync from PRIMARY Instance",
        "",
        f"This PR synchronizes **{len(categories_data)} categories** from the PRIMARY instance.",
        "",
        f"**Categories:** {', '.join(f'`{cat}`' for cat in sorted(categories_data.keys()))}",
        "",
        "### Overall Changes Summary",
        "",
    ]

    if self.settings.primary_api_url:
        lines.append(f"**Source:** {self.settings.primary_api_url}")
        lines.append("")

    lines.append(f"- **Total Added:** {total_added} models")
    lines.append(f"- **Total Removed:** {total_removed} models")
    lines.append(f"- **Total Modified:** {total_modified} models")
    lines.append(f"- **Total Changes:** {total_changes}")
    lines.append("")

    for category in sorted(categories_data.keys()):
        diff, _, _ = categories_data[category]
        lines.append(f"### {category}")
        lines.append("")
        lines.append(f"- **Added:** {len(diff.added_models)} models")
        lines.append(f"- **Removed:** {len(diff.removed_models)} models")
        lines.append(f"- **Modified:** {len(diff.modified_models)} models")
        lines.append("")

        if diff.added_models:
            lines.append("**Added Models:**")
            for model_name in sorted(diff.added_models.keys())[:10]:
                lines.append(f"- `{model_name}`")
            if len(diff.added_models) > 10:
                lines.append(f"- ... and {len(diff.added_models) - 10} more")
            lines.append("")

        if diff.removed_models:
            lines.append("**Removed Models:**")
            for model_name in sorted(diff.removed_models.keys())[:10]:
                lines.append(f"- `{model_name}`")
            if len(diff.removed_models) > 10:
                lines.append(f"- ... and {len(diff.removed_models) - 10} more")
            lines.append("")

        if diff.modified_models:
            lines.append("**Modified Models:**")
            for model_name in sorted(diff.modified_models.keys())[:10]:
                lines.append(f"- `{model_name}`")
            if len(diff.modified_models) > 10:
                lines.append(f"- ... and {len(diff.modified_models) - 10} more")
            lines.append("")

    lines.append("---")
    lines.append("")
    lines.append("*This PR was automatically generated by the horde-model-reference GitHub sync service.*")
    lines.append("")
    lines.append(
        "Please review the changes carefully before merging. "
        "If you notice any issues, contact the PRIMARY instance administrator."
    )

    return "\n".join(lines)