diff --git a/notebook_filter_duplicate_muuid.ipynb b/notebook_filter_duplicate_muuid.ipynb index 6804f82..3ebbedc 100644 --- a/notebook_filter_duplicate_muuid.ipynb +++ b/notebook_filter_duplicate_muuid.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 20, "id": "9c599d5c-925e-40e1-aac8-ec59048106b2", "metadata": {}, "outputs": [], @@ -116,103 +116,103 @@ "name": "stdout", "output_type": "stream", "text": [ - "docs 4809 keep 8263 2024-10-25 22:53:41.055078\n", - "deleted 4809\n", - "docs 5202 keep 9251 2024-10-22 22:53:49.704345\n", - "deleted 5202\n", - "docs 6394 keep 10236 2024-10-19 22:53:59.064843\n", - "deleted 6394\n", - "docs 5596 keep 8955 2024-10-16 22:54:10.537684\n", - "deleted 5596\n", - "docs 7687 keep 16842 2024-10-13 22:54:18.863983\n", - "deleted 7687\n", - "docs 5095 keep 11457 2024-10-10 22:54:31.001212\n", - "deleted 5095\n", - "docs 7062 keep 12480 2024-10-07 22:54:39.862712\n", - "deleted 7062\n", - "docs 5391 keep 9881 2024-10-04 22:54:49.507628\n", - "deleted 5391\n", - "docs 5681 keep 9687 2024-10-01 22:54:59.980359\n", + "docs 4288 keep 7324 2024-12-14 14:05:22.556333\n", + "deleted 4288\n", + "docs 4482 keep 7151 2024-12-11 14:05:29.391986\n", + "deleted 4482\n", + "docs 5681 keep 8889 2024-12-08 14:05:36.468924\n", "deleted 5681\n", - "docs 5990 keep 10201 2024-09-28 22:55:10.009160\n", - "deleted 5990\n", - "docs 4895 keep 7525 2024-09-25 22:55:19.284274\n", - "deleted 4895\n", - "docs 6702 keep 10153 2024-09-22 22:55:27.048955\n", - "deleted 6702\n", - "docs 5954 keep 9607 2024-09-19 22:55:37.265990\n", - "deleted 5954\n", - "docs 7056 keep 11205 2024-09-16 22:55:47.847675\n", - "deleted 7056\n", - "docs 5442 keep 8972 2024-09-13 22:55:57.566336\n", - "deleted 5442\n", - "docs 5719 keep 9664 2024-09-10 22:56:08.222235\n", - "deleted 5719\n", - "docs 6330 keep 11182 2024-09-07 22:56:19.691088\n", - "deleted 6330\n", - "docs 5749 keep 9011 2024-09-04 22:56:29.597109\n", - "deleted 5749\n", - "docs 4539 keep 17305 2024-09-01 22:56:39.896055\n", - "deleted 4539\n", - "docs 0 keep 8459 2024-08-29 22:56:47.370896\n", + "docs 4460 keep 12776 2024-12-05 14:05:42.185975\n", + "deleted 4460\n", + "docs 6009 keep 9659 2024-12-02 14:05:50.879045\n", + "deleted 6009\n", + "docs 4871 keep 8176 2024-11-29 14:05:58.325674\n", + "deleted 4871\n", + "docs 5326 keep 8458 2024-11-26 14:06:04.575170\n", + "deleted 5326\n", + "docs 5132 keep 8726 2024-11-23 14:06:09.265128\n", + "deleted 5132\n", + "docs 5220 keep 8210 2024-11-20 14:06:15.326936\n", + "deleted 5220\n", + "docs 5906 keep 9375 2024-11-17 14:06:26.989688\n", + "deleted 5906\n", + "docs 4307 keep 10899 2024-11-14 14:06:34.458271\n", + "deleted 4307\n", + "docs 6155 keep 9973 2024-11-11 14:06:41.698526\n", + "deleted 6155\n", + "docs 4157 keep 7218 2024-11-08 14:06:48.889393\n", + "deleted 4157\n", + "docs 4647 keep 7418 2024-11-05 14:06:54.825495\n", + "deleted 4647\n", + "docs 4759 keep 8145 2024-11-02 14:07:01.155866\n", + "deleted 4759\n", + "docs 5166 keep 8793 2024-10-30 14:07:07.480654\n", + "deleted 5166\n", + "docs 5114 keep 10616 2024-10-27 14:07:14.055395\n", + "deleted 5114\n", + "docs 0 keep 8037 2024-10-24 14:07:21.814413\n", "deleted 0\n", - "docs 0 keep 9501 2024-08-26 22:56:53.881742\n", + "docs 0 keep 9635 2024-10-21 14:07:27.152091\n", "deleted 0\n", - "docs 0 keep 8938 2024-08-23 22:56:56.979955\n", + "docs 0 keep 7868 2024-10-18 14:07:29.766024\n", "deleted 0\n", - "docs 0 keep 9494 2024-08-20 22:57:00.050195\n", + "docs 0 keep 14626 2024-10-15 14:07:32.108514\n", "deleted 0\n", - "docs 0 keep 9751 2024-08-17 22:57:03.106542\n", + "docs 0 keep 8379 2024-10-12 14:07:34.865917\n", "deleted 0\n", - "docs 0 keep 8361 2024-08-14 22:57:05.995063\n", + "docs 0 keep 10503 2024-10-09 14:07:37.267487\n", "deleted 0\n", - "docs 0 keep 9869 2024-08-11 22:57:08.531542\n", + "docs 0 keep 11323 2024-10-06 14:07:40.096856\n", "deleted 0\n", - "docs 0 keep 11854 2024-08-08 22:57:11.194938\n", + "docs 0 keep 7814 2024-10-03 14:07:43.004647\n", "deleted 0\n", - "docs 0 keep 9960 2024-08-05 22:57:13.926141\n", + "docs 0 keep 9331 2024-09-30 14:07:45.329096\n", "deleted 0\n", - "docs 186 keep 7474 2024-08-02 22:57:16.782706\n", - "deleted 186\n", - "docs 0 keep 10631 2024-07-30 22:57:19.782496\n", + "docs 254 keep 8114 2024-09-27 14:07:47.954207\n", + "deleted 254\n", + "docs 0 keep 7001 2024-09-24 14:07:50.325700\n", "deleted 0\n", - "docs 0 keep 10398 2024-07-27 22:57:22.834446\n", + "docs 0 keep 9632 2024-09-21 14:07:52.686749\n", "deleted 0\n", - "docs 0 keep 11864 2024-07-24 22:57:26.434423\n", + "docs 0 keep 8821 2024-09-18 14:07:55.271608\n", "deleted 0\n", - "docs 0 keep 15282 2024-07-21 22:57:29.925974\n", + "docs 0 keep 9489 2024-09-15 14:07:57.634515\n", + "deleted 0\n", + "docs 0 keep 7284 2024-09-12 14:08:00.213872\n", + "deleted 0\n", + "docs 0 keep 10056 2024-09-09 14:08:02.525974\n", "deleted 0\n", "Run complete!\n", - "docs 2720 keep 28259 2024-10-25 22:57:33.859147\n", - "deleted 2720\n", - "docs 2789 keep 39140 2024-10-15 22:57:41.617063\n", - "deleted 2789\n", - "docs 2578 keep 31777 2024-10-05 22:57:52.278779\n", - "deleted 2578\n", - "docs 2608 keep 28332 2024-09-25 22:58:00.310342\n", - "deleted 2608\n", - "docs 2837 keep 31592 2024-09-15 22:58:09.532103\n", - "deleted 2837\n", - "docs 1751 keep 35962 2024-09-05 22:58:18.074457\n", - "deleted 1751\n", - "docs 0 keep 31236 2024-08-26 22:58:27.413320\n", + "docs 2322 keep 28825 2024-12-14 14:08:05.219277\n", + "deleted 2322\n", + "docs 2327 keep 25721 2024-12-04 14:08:10.084122\n", + "deleted 2327\n", + "docs 2558 keep 27000 2024-11-24 14:08:16.895495\n", + "deleted 2558\n", + "docs 2362 keep 28498 2024-11-14 14:08:22.828700\n", + "deleted 2362\n", + "docs 2543 keep 26762 2024-11-04 14:08:28.110198\n", + "deleted 2543\n", + "docs 0 keep 28440 2024-10-25 14:08:32.727657\n", + "deleted 0\n", + "docs 0 keep 38049 2024-10-15 14:08:39.606070\n", "deleted 0\n", - "docs 0 keep 32668 2024-08-16 22:58:34.745503\n", + "docs 155 keep 29301 2024-10-05 14:08:44.722887\n", + "deleted 155\n", + "docs 0 keep 28043 2024-09-25 14:08:48.948459\n", "deleted 0\n", - "docs 633 keep 31293 2024-08-06 22:58:40.527507\n", - "deleted 633\n", - "docs 0 keep 41446 2024-07-27 22:58:45.678787\n", + "docs 0 keep 29831 2024-09-15 14:08:53.569984\n", "deleted 0\n", "Run complete!\n", - "docs 3614 keep 95566 2024-10-25 22:58:52.564745\n", - "deleted 3614\n", - "docs 3420 keep 92467 2024-09-25 22:59:08.854192\n", - "deleted 3420\n", - "docs 720 keep 94476 2024-08-26 22:59:25.324321\n", - "deleted 720\n", + "docs 3202 keep 78347 2024-12-14 14:08:58.307574\n", + "deleted 3202\n", + "docs 2899 keep 80801 2024-11-14 14:09:08.878483\n", + "deleted 2899\n", + "docs 1382 keep 94014 2024-10-15 14:09:19.773678\n", + "deleted 1382\n", "Run complete!\n", - "CPU times: total: 13.4 s\n", - "Wall time: 5min 59s\n" + "CPU times: total: 5.73 s\n", + "Wall time: 4min 9s\n" ] } ], @@ -247,12 +247,12 @@ "Number of documents to be deleted: 0\n", "Number of documents to be deleted: 0\n", "Number of documents to be deleted: 0\n", - "Number of documents to be deleted: 36916\n", - "Number of documents to be deleted: 95756\n", - "Number of documents to be deleted: 162676\n", - "Number of documents to be deleted: 223773\n", - "Number of documents to be deleted: 290041\n", - "Number of documents to be deleted: 352808\n" + "Number of documents to be deleted: 0\n", + "Number of documents to be deleted: 55623\n", + "Number of documents to be deleted: 110223\n", + "Number of documents to be deleted: 161371\n", + "Number of documents to be deleted: 218958\n", + "Number of documents to be deleted: 272586\n" ] } ], @@ -284,18 +284,18 @@ "Deleted 0 documents.\n", "Number of documents to be deleted: 0\n", "Deleted 0 documents.\n", - "Number of documents to be deleted: 36916\n", - "Deleted 36916 documents.\n", - "Number of documents to be deleted: 58840\n", - "Deleted 58840 documents.\n", - "Number of documents to be deleted: 66920\n", - "Deleted 66920 documents.\n", - "Number of documents to be deleted: 61104\n", - "Deleted 61104 documents.\n", - "Number of documents to be deleted: 66267\n", - "Deleted 66267 documents.\n", - "Number of documents to be deleted: 62767\n", - "Deleted 62767 documents.\n" + "Number of documents to be deleted: 0\n", + "Deleted 0 documents.\n", + "Number of documents to be deleted: 55623\n", + "Deleted 55623 documents.\n", + "Number of documents to be deleted: 54600\n", + "Deleted 54600 documents.\n", + "Number of documents to be deleted: 51151\n", + "Deleted 51151 documents.\n", + "Number of documents to be deleted: 57584\n", + "Deleted 57584 documents.\n", + "Number of documents to be deleted: 53638\n", + "Deleted 53638 documents.\n" ] } ], @@ -312,6 +312,101 @@ " print(f\"Deleted {result.deleted_count} documents.\")" ] }, + { + "cell_type": "markdown", + "id": "6a3233d2-e102-4155-ae42-9aefe8756fa0", + "metadata": {}, + "source": [ + "## try to delete every muuid, up to the latest one" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "399486de-c36b-477a-b5eb-9740ac152ebc", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "5 399 388 643 772 568 402 550 576 807 475 648 703 600 940 872 749 422 431 463 665 387 385 586 626 437 489 563 938 529 629 393 614 559 1219 331 428 501 567 441 488 658 447 455 369 503 378 491 640 580 775 319 535 463 468 657 532 646 886 464 593 668 561 384 671 765 455 424 371 744 492 606 896 696 661 559 435 771 585 343 560 963 533 492 719 621 531 517 510 516 579 480 456 391 422 477 393 523 428 561 472 559 325 567 565 511 292 570 389 466 406 487 675 496 646 563 586 572 703 432 531 593 475 624 631 782 662 1611 550 488 526 454 765 513 421 395 469 527 513 487 1000 564 378 663 1007 787 401 469 678 691 531 408 565 1085 776 387 601 497 346 368 595 633 570 683 411 587 486 404 427 416 405 409 633 576 472 688 578 385 698 512 678 481 620 386 428 327 638 489 653 446 419 759 744 723 798 692 430 625 356 569 599 793 598 482 407 480 637 616 846 781 606 475 363 528 755 582 1000 475 617 612 605 397 705 783 453 598 957 392 478 678 514 471 430 966 417 555 354 357 450 699 360 679 555 425 575 407 761 1409 449 809 543 670 554 840 691 752 521 472 472 405 433 428 595 566 534 455 516 514 610 648 510 748 454 642 585 474 317 608 392 1064 376 772 549 1055 424 481 452 552 536 418 821 451 1166 547 621 532 964 576 499 540 543 565 562 554 545 615 802 639 415 428 407 482 692 724 482 506 553 380 573 1298 631 592 633 520 732 673 425 576 424 504 673 458 761 524 770 506 465 635 294 546 430 634 333 563 395 459 805 1204 712 797 282 903 457 346 1478 682 525 558 614 518 417 1019 798 700 1017 455 500 581 781 691 481 1078 557 645 706 674 605 573 640 482 539 429 398 578 540 441 737 622 463 571 463 823 944 433 507 422 439 999 582 690 301 618 418 687 394 573 745 505 697 573 360 900 549 537 529 382 817 451 592 602 835 452 481 406 446 484 857 656 601 678 437 462 472 293 590 729 364 467 431 445 824 485 541 537 442 1324 516 477 431 709 554 588 672 471 483 341 390 481 667 780 502 633 636 666 384 528 502 411 600 554 840 570 679 643 466 443 504 417 607 1046 349 525 1600 433 665 746 427 667 666 459 402 725 478 429 735 772 543 572 611 1065 823 495 417 421 526 396 728 901 658 524 379 509 383 711 418 600 471 512 460 638 525 523 516 687 979 665 554 825 547 898 586 438 623 430 1308 518 531 685 560 707 517 888 755 437 595 477 514 533 510 530 419 509 620 367 332 619 440 474 420 503 502 845 587 580 504 588 946 733 647 542 494 549 819 559 330 444 652 1047 557 514 521 479 655 409 440 403 304 558 506 363 491 741 404 575 506 568 328 928 548 641 1093 610 388 748 607 609 435 483 369 777 553 1020 613 353 306 1367 569 533 506 437 493 398 447 557 504 690 707 523 489 808 1344 540 681 610 711 597 688 475 587 477 518 564 336 643 435 686 528 541 685 520 599 501 706 441 824 584 541 1106 516 613 440 534 632 381 716 342 626 663 595 602 726 707 564 442 516 428 512 548 699 601 444 1048 554 497 506 585 552 562 542 800 574 336 1642 544 401 417 406 583 552 686 631 661 705 732 288 498 1440 866 482 781 472 387 626 336 549 549 329 407 588 872 459 308 568 659 380 905 324 745 755 909 546 559 978 943 493 375 483 586 561 505 577 376 326 562 1163 609 559 574 471 522 474 556 734 694 460 650 453 563 616 431 455 410 822 605 424 463 704 726 640 558 987 693 433 492 950 442 745 589 460 350 410 456 386 423 740 799 478 443 404 997 627 513 386 986 976 518 620 671 618 595 730 754 475 1072 481 743 468 545 457 697 1390 304 560 580 339 397 617 696 487 723 782 900 524 468 745 413 475 483 863 804 807 368 836 629 532 410 1088 397 629 686 389 811 701 307 484 875 558 761 544 494 490 618 419 509 463 631 548 561 698 585 403 315 796 784 574 878 619 366 504 657 684 447 708 619 565 435 703 520 585 625 443 776 1434 588 687 577 771 448 731 703 927 532 562 708 671 775 345 366 543 488 472 460 410 806 608 398 876 806 621 400 503 810 319 541 853 501 505 522 522 550 1095 629 677 375 482 485 712 419 665 437 470 444 495 483 672 439 373 753 623 478 473 400 624 385 567 497 477 384 458 727 559 491 416 445 574 470 521 713 467 455 411 499 602 608 387 711 998 400 419 475 435 561 572 552 1082 620 1308 612 385 668 427 491 769 425 502 978 908 516 366 634 514 612 415 648 662 582 637 2021 406 966 310 425 402 577 592 514 576 489 605 644 318 804 " + ] + }, + { + "ename": "KeyboardInterrupt", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", + "File \u001b[1;32m:12\u001b[0m\n", + "File \u001b[1;32mc:\\users\\aatan\\documents\\github\\watermelonbot\\.venv\\lib\\site-packages\\pymongo\\cursor.py:1262\u001b[0m, in \u001b[0;36mCursor.next\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 1260\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m__empty:\n\u001b[0;32m 1261\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mStopIteration\u001b[39;00m\n\u001b[1;32m-> 1262\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m__data) \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_refresh\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m:\n\u001b[0;32m 1263\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m__data\u001b[38;5;241m.\u001b[39mpopleft()\n\u001b[0;32m 1264\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n", + "File \u001b[1;32mc:\\users\\aatan\\documents\\github\\watermelonbot\\.venv\\lib\\site-packages\\pymongo\\cursor.py:1179\u001b[0m, in \u001b[0;36mCursor._refresh\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 1157\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m InvalidOperation(\n\u001b[0;32m 1158\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mPassing a \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mhint\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m is required when using the min/max query\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 1159\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m option to ensure the query utilizes the correct index\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 1160\u001b[0m )\n\u001b[0;32m 1161\u001b[0m q \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_query_class(\n\u001b[0;32m 1162\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m__query_flags,\n\u001b[0;32m 1163\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m__collection\u001b[38;5;241m.\u001b[39mdatabase\u001b[38;5;241m.\u001b[39mname,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 1177\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m__exhaust,\n\u001b[0;32m 1178\u001b[0m )\n\u001b[1;32m-> 1179\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m__send_message\u001b[49m\u001b[43m(\u001b[49m\u001b[43mq\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 1180\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m__id: \u001b[38;5;66;03m# Get More\u001b[39;00m\n\u001b[0;32m 1181\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m__limit:\n", + "File \u001b[1;32mc:\\users\\aatan\\documents\\github\\watermelonbot\\.venv\\lib\\site-packages\\pymongo\\cursor.py:1060\u001b[0m, in \u001b[0;36mCursor.__send_message\u001b[1;34m(self, operation)\u001b[0m\n\u001b[0;32m 1057\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m InvalidOperation(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mexhaust cursors do not support auto encryption\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 1059\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m-> 1060\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[43mclient\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_run_operation\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 1061\u001b[0m \u001b[43m \u001b[49m\u001b[43moperation\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_unpack_response\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maddress\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m__address\u001b[49m\n\u001b[0;32m 1062\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 1063\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m OperationFailure \u001b[38;5;28;01mas\u001b[39;00m exc:\n\u001b[0;32m 1064\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m exc\u001b[38;5;241m.\u001b[39mcode \u001b[38;5;129;01min\u001b[39;00m _CURSOR_CLOSED_ERRORS \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m__exhaust:\n\u001b[0;32m 1065\u001b[0m \u001b[38;5;66;03m# Don't send killCursors because the cursor is already closed.\u001b[39;00m\n", + "File \u001b[1;32mc:\\users\\aatan\\documents\\github\\watermelonbot\\.venv\\lib\\site-packages\\pymongo\\_csot.py:108\u001b[0m, in \u001b[0;36mapply..csot_wrapper\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 106\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m _TimeoutContext(timeout):\n\u001b[0;32m 107\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m func(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m--> 108\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m func(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n", + "File \u001b[1;32mc:\\users\\aatan\\documents\\github\\watermelonbot\\.venv\\lib\\site-packages\\pymongo\\mongo_client.py:1388\u001b[0m, in \u001b[0;36mMongoClient._run_operation\u001b[1;34m(self, operation, unpack_res, address)\u001b[0m\n\u001b[0;32m 1383\u001b[0m operation\u001b[38;5;241m.\u001b[39mreset() \u001b[38;5;66;03m# Reset op in case of retry.\u001b[39;00m\n\u001b[0;32m 1384\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m server\u001b[38;5;241m.\u001b[39mrun_operation(\n\u001b[0;32m 1385\u001b[0m conn, operation, read_preference, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_event_listeners, unpack_res\n\u001b[0;32m 1386\u001b[0m )\n\u001b[1;32m-> 1388\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_retryable_read\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 1389\u001b[0m \u001b[43m \u001b[49m\u001b[43m_cmd\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1390\u001b[0m \u001b[43m \u001b[49m\u001b[43moperation\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread_preference\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1391\u001b[0m \u001b[43m \u001b[49m\u001b[43moperation\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msession\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1392\u001b[0m \u001b[43m \u001b[49m\u001b[43maddress\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43maddress\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1393\u001b[0m \u001b[43m \u001b[49m\u001b[43mretryable\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43misinstance\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43moperation\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmessage\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_Query\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1394\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[1;32mc:\\users\\aatan\\documents\\github\\watermelonbot\\.venv\\lib\\site-packages\\pymongo\\_csot.py:108\u001b[0m, in \u001b[0;36mapply..csot_wrapper\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 106\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m _TimeoutContext(timeout):\n\u001b[0;32m 107\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m func(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m--> 108\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m func(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n", + "File \u001b[1;32mc:\\users\\aatan\\documents\\github\\watermelonbot\\.venv\\lib\\site-packages\\pymongo\\mongo_client.py:1535\u001b[0m, in \u001b[0;36mMongoClient._retryable_read\u001b[1;34m(self, func, read_pref, session, address, retryable)\u001b[0m\n\u001b[0;32m 1533\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m last_error \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m 1534\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m last_error\n\u001b[1;32m-> 1535\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43msession\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mserver\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconn\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mread_pref\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 1536\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m ServerSelectionTimeoutError:\n\u001b[0;32m 1537\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m retrying:\n\u001b[0;32m 1538\u001b[0m \u001b[38;5;66;03m# The application may think the write was never attempted\u001b[39;00m\n\u001b[0;32m 1539\u001b[0m \u001b[38;5;66;03m# if we raise ServerSelectionTimeoutError on the retry\u001b[39;00m\n\u001b[0;32m 1540\u001b[0m \u001b[38;5;66;03m# attempt. Raise the original exception instead.\u001b[39;00m\n", + "File \u001b[1;32mc:\\users\\aatan\\documents\\github\\watermelonbot\\.venv\\lib\\site-packages\\pymongo\\mongo_client.py:1384\u001b[0m, in \u001b[0;36mMongoClient._run_operation.._cmd\u001b[1;34m(session, server, conn, read_preference)\u001b[0m\n\u001b[0;32m 1377\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_cmd\u001b[39m(\n\u001b[0;32m 1378\u001b[0m session: Optional[ClientSession],\n\u001b[0;32m 1379\u001b[0m server: Server,\n\u001b[0;32m 1380\u001b[0m conn: Connection,\n\u001b[0;32m 1381\u001b[0m read_preference: _ServerMode,\n\u001b[0;32m 1382\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Response:\n\u001b[0;32m 1383\u001b[0m operation\u001b[38;5;241m.\u001b[39mreset() \u001b[38;5;66;03m# Reset op in case of retry.\u001b[39;00m\n\u001b[1;32m-> 1384\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mserver\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrun_operation\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 1385\u001b[0m \u001b[43m \u001b[49m\u001b[43mconn\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43moperation\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mread_preference\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_event_listeners\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43munpack_res\u001b[49m\n\u001b[0;32m 1386\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[1;32mc:\\users\\aatan\\documents\\github\\watermelonbot\\.venv\\lib\\site-packages\\pymongo\\helpers.py:315\u001b[0m, in \u001b[0;36m_handle_reauth..inner\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 312\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpymongo\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpool\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Connection\n\u001b[0;32m 314\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 315\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m func(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m 316\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m OperationFailure \u001b[38;5;28;01mas\u001b[39;00m exc:\n\u001b[0;32m 317\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m no_reauth:\n", + "File \u001b[1;32mc:\\users\\aatan\\documents\\github\\watermelonbot\\.venv\\lib\\site-packages\\pymongo\\server.py:155\u001b[0m, in \u001b[0;36mServer.run_operation\u001b[1;34m(self, conn, operation, read_preference, listeners, unpack_res)\u001b[0m\n\u001b[0;32m 153\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 154\u001b[0m conn\u001b[38;5;241m.\u001b[39msend_message(data, max_doc_size)\n\u001b[1;32m--> 155\u001b[0m reply \u001b[38;5;241m=\u001b[39m \u001b[43mconn\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mreceive_message\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrequest_id\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 157\u001b[0m \u001b[38;5;66;03m# Unpack and check for command errors.\u001b[39;00m\n\u001b[0;32m 158\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m use_cmd:\n", + "File \u001b[1;32mc:\\users\\aatan\\documents\\github\\watermelonbot\\.venv\\lib\\site-packages\\pymongo\\pool.py:986\u001b[0m, in \u001b[0;36mConnection.receive_message\u001b[1;34m(self, request_id)\u001b[0m\n\u001b[0;32m 984\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m receive_message(\u001b[38;5;28mself\u001b[39m, request_id, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmax_message_size)\n\u001b[0;32m 985\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mBaseException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m error:\n\u001b[1;32m--> 986\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_raise_connection_failure\u001b[49m\u001b[43m(\u001b[49m\u001b[43merror\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[1;32mc:\\users\\aatan\\documents\\github\\watermelonbot\\.venv\\lib\\site-packages\\pymongo\\pool.py:984\u001b[0m, in \u001b[0;36mConnection.receive_message\u001b[1;34m(self, request_id)\u001b[0m\n\u001b[0;32m 979\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Receive a raw BSON message or raise ConnectionFailure.\u001b[39;00m\n\u001b[0;32m 980\u001b[0m \n\u001b[0;32m 981\u001b[0m \u001b[38;5;124;03mIf any exception is raised, the socket is closed.\u001b[39;00m\n\u001b[0;32m 982\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m 983\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 984\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mreceive_message\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrequest_id\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmax_message_size\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 985\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mBaseException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m error:\n\u001b[0;32m 986\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_raise_connection_failure(error)\n", + "File \u001b[1;32mc:\\users\\aatan\\documents\\github\\watermelonbot\\.venv\\lib\\site-packages\\pymongo\\network.py:247\u001b[0m, in \u001b[0;36mreceive_message\u001b[1;34m(conn, request_id, max_message_size)\u001b[0m\n\u001b[0;32m 245\u001b[0m deadline \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m 246\u001b[0m \u001b[38;5;66;03m# Ignore the response's request id.\u001b[39;00m\n\u001b[1;32m--> 247\u001b[0m length, _, response_to, op_code \u001b[38;5;241m=\u001b[39m _UNPACK_HEADER(\u001b[43m_receive_data_on_socket\u001b[49m\u001b[43m(\u001b[49m\u001b[43mconn\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m16\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdeadline\u001b[49m\u001b[43m)\u001b[49m)\n\u001b[0;32m 248\u001b[0m \u001b[38;5;66;03m# No request_id for exhaust cursor \"getMore\".\u001b[39;00m\n\u001b[0;32m 249\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m request_id \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n", + "File \u001b[1;32mc:\\users\\aatan\\documents\\github\\watermelonbot\\.venv\\lib\\site-packages\\pymongo\\network.py:328\u001b[0m, in \u001b[0;36m_receive_data_on_socket\u001b[1;34m(conn, length, deadline)\u001b[0m\n\u001b[0;32m 326\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m _csot\u001b[38;5;241m.\u001b[39mget_timeout() \u001b[38;5;129;01mand\u001b[39;00m deadline \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m 327\u001b[0m conn\u001b[38;5;241m.\u001b[39mset_conn_timeout(\u001b[38;5;28mmax\u001b[39m(deadline \u001b[38;5;241m-\u001b[39m time\u001b[38;5;241m.\u001b[39mmonotonic(), \u001b[38;5;241m0\u001b[39m))\n\u001b[1;32m--> 328\u001b[0m chunk_length \u001b[38;5;241m=\u001b[39m \u001b[43mconn\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mconn\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrecv_into\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmv\u001b[49m\u001b[43m[\u001b[49m\u001b[43mbytes_read\u001b[49m\u001b[43m:\u001b[49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 329\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m BLOCKING_IO_ERRORS:\n\u001b[0;32m 330\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m socket\u001b[38;5;241m.\u001b[39mtimeout(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtimed out\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", + "File \u001b[1;32m~\\AppData\\Local\\Programs\\Python\\Python39\\lib\\ssl.py:1241\u001b[0m, in \u001b[0;36mSSLSocket.recv_into\u001b[1;34m(self, buffer, nbytes, flags)\u001b[0m\n\u001b[0;32m 1237\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m flags \u001b[38;5;241m!=\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[0;32m 1238\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[0;32m 1239\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnon-zero flags not allowed in calls to recv_into() on \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m%\u001b[39m\n\u001b[0;32m 1240\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m)\n\u001b[1;32m-> 1241\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread\u001b[49m\u001b[43m(\u001b[49m\u001b[43mnbytes\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbuffer\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 1242\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 1243\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28msuper\u001b[39m()\u001b[38;5;241m.\u001b[39mrecv_into(buffer, nbytes, flags)\n", + "File \u001b[1;32m~\\AppData\\Local\\Programs\\Python\\Python39\\lib\\ssl.py:1099\u001b[0m, in \u001b[0;36mSSLSocket.read\u001b[1;34m(self, len, buffer)\u001b[0m\n\u001b[0;32m 1097\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m 1098\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m buffer \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m-> 1099\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_sslobj\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mlen\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbuffer\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 1100\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 1101\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_sslobj\u001b[38;5;241m.\u001b[39mread(\u001b[38;5;28mlen\u001b[39m)\n", + "\u001b[1;31mKeyboardInterrupt\u001b[0m: " + ] + } + ], + "source": [ + "%%time\n", + "# Step 1: Get all unique 'muuid'\n", + "unique_muuids = muuid.distinct(\"muuid\")\n", + "\n", + "# Step 2: Process 100 'muuid' at a time\n", + "batch_size = 100\n", + "total_counts=0\n", + "for i in range(1, len(unique_muuids), batch_size):\n", + " counts=0\n", + " batch = unique_muuids[i:i+batch_size] # Get a batch of 100 muuids\n", + " for muuid1 in batch:\n", + " # Find the latest document for this muuid\n", + " latest_doc = muuid.find({\"muuid\": muuid1}).sort(\"date\", -1).limit(1).next()\n", + " latest_doc_id = latest_doc[\"_id\"]\n", + " #print(\"keep\",latest_doc_id,latest_doc)\n", + " # Delete all other documents with this muuid except the latest one\n", + " result = muuid.delete_many({\"muuid\": muuid1, \"_id\": {\"$ne\": latest_doc_id}})\n", + " counts+=result.deleted_count\n", + " #print(f\"Deleted {result.deleted_count} old documents for muuid: {muuid1}\")\n", + " print(f\"{counts}\",end=\" \")\n", + " total_counts+=counts\n", + "print(f\"\\nCleanup complete. total deleted={total_counts}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "ef5810d8-07f6-4a29-9c76-3c4c5027cf8d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(101201, 593805)" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "i,total_counts" + ] + }, { "cell_type": "markdown", "id": "491a6f11",