diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml new file mode 100644 index 000000000..abbc82607 --- /dev/null +++ b/.github/workflows/ci.yaml @@ -0,0 +1,20 @@ +name: CI + +on: + # run on pushed commits to master and on new commits on pull requests + push: + pull_request: + types: [opened, synchronize] + +jobs: + Security: + name: Security Pipeline + uses: uc-cdis/.github/.github/workflows/securitypipeline.yaml@master + with: + python-poetry: 'false' + secrets: inherit # pragma: allowlist secret + UnitTest: + name: Python Unit Test + uses: uc-cdis/.github/.github/workflows/python_unit_test.yaml@master + with: + python-version: '3.9' \ No newline at end of file diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yaml similarity index 77% rename from .github/workflows/docs.yml rename to .github/workflows/docs.yaml index 5b89a4576..e57632f58 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yaml @@ -1,8 +1,9 @@ -name: Build docs +name: Docs on: + # DON'T run on pushed commits to master, ONLY on new commits on pull requests pull_request: - branches: [ master ] + types: [opened, synchronize] jobs: build_docs: @@ -11,7 +12,8 @@ jobs: steps: - uses: actions/checkout@v2 with: - ref: ${{ github.head_ref }} + ref: ${{github.event.pull_request.head.ref}} + repository: ${{github.event.pull_request.head.repo.full_name}} - name: Set up Python 3.9 uses: actions/setup-python@v1 with: @@ -24,19 +26,20 @@ jobs: ${{ runner.os }}-poetry- - name: Install dependencies run: | - curl -sSL https://raw.githubusercontent.com/python-poetry/poetry/master/get-poetry.py | python - - source $HOME/.poetry/env + pip install poetry poetry config virtualenvs.create false - poetry install -vv + poetry install -vv --all-extras --no-interaction + poetry show -vv + # install sphinx from PyPI (as of 03/16/21 python3-sphinx is broken) # sudo apt-get install python3-sphinx pip install sphinx pip uninstall -y asyncio + pip list cd - name: Build docs run: | sphinx-build --version - source $HOME/.poetry/env export PYTHONPATH="${PYTHONPATH}:${{ env.pythonLocation }}/lib/python3.9/site-packages" cd docs poetry run make html diff --git a/.github/workflows/main.yml b/.github/workflows/lint.yaml similarity index 73% rename from .github/workflows/main.yml rename to .github/workflows/lint.yaml index 59e96ae2f..8cc810099 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/lint.yaml @@ -1,5 +1,6 @@ on: pull_request: + types: [opened, synchronize] issue_comment: types: [created, edited] @@ -11,7 +12,6 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@master - - uses: uc-cdis/wool@master env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} \ No newline at end of file diff --git a/.gitignore b/.gitignore index 3532039cc..e530d0750 100644 --- a/.gitignore +++ b/.gitignore @@ -115,3 +115,11 @@ indexing-output-manifest.csv object-manifest.csv output_manifest.csv +.dccache +.idea +# Files generated during pytest +input.csv +test_combined_discovery_metadata.tsv +test_combined_discovery_metadata_exact_match.tsv +tmp_output_file.csv +tmp_output_file_info.csv diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b52c89394..352da81e4 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,17 +1,17 @@ repos: - repo: git@github.com:Yelp/detect-secrets - rev: v0.13.1 + rev: v1.4.0 hooks: - id: detect-secrets args: ['--baseline', '.secrets.baseline'] exclude: '(docs\/_build|poetry.lock)' - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v2.5.0 + rev: v4.4.0 hooks: - id: no-commit-to-branch args: [--branch, develop, --branch, master, --pattern, release/.*] - repo: https://github.com/psf/black - rev: 22.3.0 + rev: 23.1.0 hooks: - id: black additional_dependencies: ['click==8.0.4'] diff --git a/.secexclude b/.secexclude deleted file mode 100644 index 8c2dbe13f..000000000 --- a/.secexclude +++ /dev/null @@ -1,52 +0,0 @@ -*.cfg -*.crt -*.css -*.cwl -*.eot -*.gif -*.gpg -*.ico -*.ini -*.ipynb -*.jpg -*.json -*.md -*.min.js -*.njk -*.png -*.rst -*.sh -*.sls -*.sql -*.svg -*.tif -*.tsv -*.ttf -*.txt -*.woff -*.woff2 -*.xml -*.yaml -*.yml -*.zip -.DS_Store -.DS_Store? -.Spotlight-V100 -.Trashes -._* -.dockerignore -.githooks/* -.github/* -.gitignore -.travis.yml -Dockerfile -Jenkinsfile -LICENSE -NOTICE -Thumbs.db -docker-compose.yml -ehthumbs.db -migrations/* -nginx.conf -test/* -tests/* \ No newline at end of file diff --git a/.secrets.baseline b/.secrets.baseline index df9ea536d..87ffb76a6 100644 --- a/.secrets.baseline +++ b/.secrets.baseline @@ -1,19 +1,18 @@ { - "exclude": { - "files": "(docs\\/_build|poetry.lock)|^.secrets.baseline$", - "lines": null - }, - "generated_at": "2022-07-12T20:56:54Z", + "version": "1.4.0", "plugins_used": [ + { + "name": "ArtifactoryDetector" + }, { "name": "AWSKeyDetector" }, { - "name": "ArtifactoryDetector" + "name": "AzureStorageKeyDetector" }, { - "base64_limit": 4.5, - "name": "Base64HighEntropyString" + "name": "Base64HighEntropyString", + "limit": 4.5 }, { "name": "BasicAuthDetector" @@ -22,8 +21,14 @@ "name": "CloudantDetector" }, { - "hex_limit": 3, - "name": "HexHighEntropyString" + "name": "DiscordBotTokenDetector" + }, + { + "name": "GitHubTokenDetector" + }, + { + "name": "HexHighEntropyString", + "limit": 3.0 }, { "name": "IbmCloudIamDetector" @@ -35,21 +40,30 @@ "name": "JwtTokenDetector" }, { - "keyword_exclude": null, - "name": "KeywordDetector" + "name": "KeywordDetector", + "keyword_exclude": "" }, { "name": "MailchimpDetector" }, + { + "name": "NpmDetector" + }, { "name": "PrivateKeyDetector" }, + { + "name": "SendGridDetector" + }, { "name": "SlackDetector" }, { "name": "SoftlayerDetector" }, + { + "name": "SquareOAuthDetector" + }, { "name": "StripeDetector" }, @@ -57,357 +71,590 @@ "name": "TwilioKeyDetector" } ], + "filters_used": [ + { + "path": "detect_secrets.filters.allowlist.is_line_allowlisted" + }, + { + "path": "detect_secrets.filters.common.is_baseline_file", + "filename": ".secrets.baseline" + }, + { + "path": "detect_secrets.filters.common.is_ignored_due_to_verification_policies", + "min_level": 2 + }, + { + "path": "detect_secrets.filters.heuristic.is_indirect_reference" + }, + { + "path": "detect_secrets.filters.heuristic.is_likely_id_string" + }, + { + "path": "detect_secrets.filters.heuristic.is_lock_file" + }, + { + "path": "detect_secrets.filters.heuristic.is_not_alphanumeric_string" + }, + { + "path": "detect_secrets.filters.heuristic.is_potential_uuid" + }, + { + "path": "detect_secrets.filters.heuristic.is_prefixed_with_dollar_sign" + }, + { + "path": "detect_secrets.filters.heuristic.is_sequential_string" + }, + { + "path": "detect_secrets.filters.heuristic.is_swagger_file" + }, + { + "path": "detect_secrets.filters.heuristic.is_templated_secret" + } + ], "results": { - "docs/howto/metadataTools.md": [ + ".travis.yml": [ { - "hashed_secret": "7c35c215b326b9463b669b657c1ff9873ff53d9a", + "type": "Base64 High Entropy String", + "filename": ".travis.yml", + "hashed_secret": "2b450d45e8b520a9b25727d563d1d7e036497655", "is_verified": false, - "line_number": 203, - "type": "Hex High Entropy String" + "line_number": 47 }, { + "type": "Base64 High Entropy String", + "filename": ".travis.yml", + "hashed_secret": "8dbd93a5374b178c4c142aa603c6daf3edd8413e", + "is_verified": false, + "line_number": 48 + } + ], + "docs/_build/html/.buildinfo": [ + { + "type": "Hex High Entropy String", + "filename": "docs/_build/html/.buildinfo", + "hashed_secret": "52acb5a7ffb1bfafa0e4f02cfbbfc70c59e7906d", + "is_verified": false, + "line_number": 3 + }, + { + "type": "Hex High Entropy String", + "filename": "docs/_build/html/.buildinfo", + "hashed_secret": "63256a38625700f409b7ec5db8fa05ffd2cb88b2", + "is_verified": false, + "line_number": 4 + } + ], + "docs/howto/metadataTools.md": [ + { + "type": "Hex High Entropy String", + "filename": "docs/howto/metadataTools.md", "hashed_secret": "0d515eaf06062d52e8c80abb4d3b713a65396d30", "is_verified": false, - "line_number": 208, - "type": "Hex High Entropy String" + "line_number": 189 }, { + "type": "Hex High Entropy String", + "filename": "docs/howto/metadataTools.md", "hashed_secret": "b4cff7c2af45cdfe66195ec574a7b8832f8621ea", "is_verified": false, - "line_number": 215, - "type": "Hex High Entropy String" + "line_number": 189 + }, + { + "type": "Hex High Entropy String", + "filename": "docs/howto/metadataTools.md", + "hashed_secret": "7c35c215b326b9463b669b657c1ff9873ff53d9a", + "is_verified": false, + "line_number": 202 + }, + { + "type": "Secret Keyword", + "filename": "docs/howto/metadataTools.md", + "hashed_secret": "749dcc9e92f723c96ed4a3a908ded1bc559c66a9", + "is_verified": false, + "line_number": 540 + } + ], + "docs/reference/sdkClasses.md": [ + { + "type": "Secret Keyword", + "filename": "docs/reference/sdkClasses.md", + "hashed_secret": "749dcc9e92f723c96ed4a3a908ded1bc559c66a9", + "is_verified": false, + "line_number": 58 } ], "docs/tutorial/quickStart.md": [ { + "type": "Hex High Entropy String", + "filename": "docs/tutorial/quickStart.md", "hashed_secret": "301918c8b904630da85e75ee32e9ba68ff925b73", "is_verified": false, - "line_number": 74, - "type": "Hex High Entropy String" + "line_number": 75 } ], "tests/bundle_tests/test_bundle_ingestion.py": [ { - "hashed_secret": "bbe496b7ae06cf635b221afedd43098f4172d7cb", + "type": "Hex High Entropy String", + "filename": "tests/bundle_tests/test_bundle_ingestion.py", + "hashed_secret": "e1da93616713812cb50e0ac845b1e9e305d949f1", "is_verified": false, - "line_number": 104, - "type": "Hex High Entropy String" + "line_number": 44 }, { - "hashed_secret": "968d57fa495dde77707437e3c94f8da2d4074c84", + "type": "Hex High Entropy String", + "filename": "tests/bundle_tests/test_bundle_ingestion.py", + "hashed_secret": "47f42f4c34fddab383b817e689dc0fb75af81266", "is_verified": false, - "line_number": 107, - "type": "Hex High Entropy String" + "line_number": 52 }, { - "hashed_secret": "e1da93616713812cb50e0ac845b1e9e305d949f1", + "type": "Hex High Entropy String", + "filename": "tests/bundle_tests/test_bundle_ingestion.py", + "hashed_secret": "300d95dd5d30ab6928ffda6c08c6a129a23e5b39", "is_verified": false, - "line_number": 119, - "type": "Hex High Entropy String" + "line_number": 61 }, { - "hashed_secret": "47f42f4c34fddab383b817e689dc0fb75af81266", + "type": "Hex High Entropy String", + "filename": "tests/bundle_tests/test_bundle_ingestion.py", + "hashed_secret": "bbe496b7ae06cf635b221afedd43098f4172d7cb", "is_verified": false, - "line_number": 127, - "type": "Hex High Entropy String" + "line_number": 93 }, { - "hashed_secret": "300d95dd5d30ab6928ffda6c08c6a129a23e5b39", + "type": "Hex High Entropy String", + "filename": "tests/bundle_tests/test_bundle_ingestion.py", + "hashed_secret": "968d57fa495dde77707437e3c94f8da2d4074c84", "is_verified": false, - "line_number": 136, - "type": "Hex High Entropy String" + "line_number": 96 } ], "tests/bundle_tests/valid_manifest.csv": [ { + "type": "Hex High Entropy String", + "filename": "tests/bundle_tests/valid_manifest.csv", "hashed_secret": "968d57fa495dde77707437e3c94f8da2d4074c84", "is_verified": false, - "line_number": 6, - "type": "Hex High Entropy String" + "line_number": 6 }, { + "type": "Hex High Entropy String", + "filename": "tests/bundle_tests/valid_manifest.csv", "hashed_secret": "bbe496b7ae06cf635b221afedd43098f4172d7cb", "is_verified": false, - "line_number": 6, - "type": "Hex High Entropy String" + "line_number": 6 } ], "tests/download_tests/resources/dataguids_commons1.json": [ { + "type": "Hex High Entropy String", + "filename": "tests/download_tests/resources/dataguids_commons1.json", "hashed_secret": "8c9a1166eab2cb85ccc7f3ab1d0c972a222c8fde", "is_verified": false, - "line_number": 16, - "type": "Hex High Entropy String" + "line_number": 16 } ], "tests/download_tests/resources/drs_object_commons3.json": [ { + "type": "Hex High Entropy String", + "filename": "tests/download_tests/resources/drs_object_commons3.json", "hashed_secret": "8c9a1166eab2cb85ccc7f3ab1d0c972a222c8fde", "is_verified": false, - "line_number": 15, - "type": "Hex High Entropy String" + "line_number": 15 } ], "tests/download_tests/resources/drs_objects.json": [ { + "type": "Hex High Entropy String", + "filename": "tests/download_tests/resources/drs_objects.json", "hashed_secret": "8c9a1166eab2cb85ccc7f3ab1d0c972a222c8fde", "is_verified": false, - "line_number": 17, - "type": "Hex High Entropy String" + "line_number": 17 }, { + "type": "Hex High Entropy String", + "filename": "tests/download_tests/resources/drs_objects.json", "hashed_secret": "08d7c960f61ee9654cce7d3e553f96db617a044d", "is_verified": false, - "line_number": 47, - "type": "Hex High Entropy String" + "line_number": 47 }, { + "type": "Hex High Entropy String", + "filename": "tests/download_tests/resources/drs_objects.json", "hashed_secret": "97a21a900edf0e7f0697664e64bca030ac07b0f7", "is_verified": false, - "line_number": 76, - "type": "Hex High Entropy String" + "line_number": 76 }, { + "type": "Hex High Entropy String", + "filename": "tests/download_tests/resources/drs_objects.json", "hashed_secret": "a8f24876ffd6cc7576b6f0f8f20580f2aabdf9b6", "is_verified": false, - "line_number": 136, - "type": "Hex High Entropy String" + "line_number": 106 }, { + "type": "Hex High Entropy String", + "filename": "tests/download_tests/resources/drs_objects.json", "hashed_secret": "c59c7c1a234691b797e9d47ef2769e4edc4a9bc6", "is_verified": false, - "line_number": 170, - "type": "Hex High Entropy String" + "line_number": 170 }, { + "type": "Hex High Entropy String", + "filename": "tests/download_tests/resources/drs_objects.json", "hashed_secret": "f755b5b416e81d4c768bca49d8951ebba3f4666d", "is_verified": false, - "line_number": 174, - "type": "Hex High Entropy String" + "line_number": 174 }, { + "type": "Hex High Entropy String", + "filename": "tests/download_tests/resources/drs_objects.json", "hashed_secret": "195760416def756cf48aabe259bc97dcad599979", "is_verified": false, - "line_number": 178, - "type": "Hex High Entropy String" + "line_number": 178 }, { - "hashed_secret": "d162413fa99c56914e2f4502359a135d07b6c4c2", + "type": "Hex High Entropy String", + "filename": "tests/download_tests/resources/drs_objects.json", + "hashed_secret": "1fee40bfa56e1899cf5363f57a8dc491ef8dd02e", "is_verified": false, - "line_number": 296, - "type": "Hex High Entropy String" + "line_number": 182 }, { - "hashed_secret": "1fee40bfa56e1899cf5363f57a8dc491ef8dd02e", + "type": "Hex High Entropy String", + "filename": "tests/download_tests/resources/drs_objects.json", + "hashed_secret": "d162413fa99c56914e2f4502359a135d07b6c4c2", "is_verified": false, - "line_number": 331, - "type": "Hex High Entropy String" + "line_number": 270 }, { + "type": "Hex High Entropy String", + "filename": "tests/download_tests/resources/drs_objects.json", "hashed_secret": "bcbf966945c7e3f7592a7e067382afd71dcdc107", "is_verified": false, - "line_number": 510, - "type": "Hex High Entropy String" + "line_number": 360 } ], "tests/download_tests/resources/manifest_test_1.json": [ { + "type": "Hex High Entropy String", + "filename": "tests/download_tests/resources/manifest_test_1.json", "hashed_secret": "8c9a1166eab2cb85ccc7f3ab1d0c972a222c8fde", "is_verified": false, - "line_number": 3, - "type": "Hex High Entropy String" + "line_number": 3 }, { + "type": "Hex High Entropy String", + "filename": "tests/download_tests/resources/manifest_test_1.json", "hashed_secret": "54a2e9ce63b1cc74558348c0aea7304e1a389766", "is_verified": false, - "line_number": 10, - "type": "Hex High Entropy String" + "line_number": 10 }, { + "type": "Hex High Entropy String", + "filename": "tests/download_tests/resources/manifest_test_1.json", "hashed_secret": "97a21a900edf0e7f0697664e64bca030ac07b0f7", "is_verified": false, - "line_number": 17, - "type": "Hex High Entropy String" + "line_number": 17 }, { + "type": "Hex High Entropy String", + "filename": "tests/download_tests/resources/manifest_test_1.json", "hashed_secret": "a8f24876ffd6cc7576b6f0f8f20580f2aabdf9b6", "is_verified": false, - "line_number": 24, - "type": "Hex High Entropy String" + "line_number": 24 } ], "tests/download_tests/resources/manifest_test_hostname_not_in_wts.json": [ { + "type": "Hex High Entropy String", + "filename": "tests/download_tests/resources/manifest_test_hostname_not_in_wts.json", "hashed_secret": "8c9a1166eab2cb85ccc7f3ab1d0c972a222c8fde", "is_verified": false, - "line_number": 3, - "type": "Hex High Entropy String" + "line_number": 3 } ], "tests/download_tests/resources/mds_package.json": [ { + "type": "Hex High Entropy String", + "filename": "tests/download_tests/resources/mds_package.json", "hashed_secret": "0d515eaf06062d52e8c80abb4d3b713a65396d30", "is_verified": false, - "line_number": 173, - "type": "Hex High Entropy String" + "line_number": 17 }, { + "type": "Hex High Entropy String", + "filename": "tests/download_tests/resources/mds_package.json", "hashed_secret": "b4cff7c2af45cdfe66195ec574a7b8832f8621ea", "is_verified": false, - "line_number": 181, - "type": "Hex High Entropy String" + "line_number": 25 }, { + "type": "Hex High Entropy String", + "filename": "tests/download_tests/resources/mds_package.json", "hashed_secret": "bcbf966945c7e3f7592a7e067382afd71dcdc107", "is_verified": false, - "line_number": 190, - "type": "Hex High Entropy String" + "line_number": 34 + } + ], + "tests/download_tests/test_async_download.py": [ + { + "type": "Secret Keyword", + "filename": "tests/download_tests/test_async_download.py", + "hashed_secret": "f0243b023d0551febbeac98dab37b0780328cb99", + "is_verified": false, + "line_number": 220 + } + ], + "tests/merge_manifests/discovery_combine/discovery.tsv": [ + { + "type": "Hex High Entropy String", + "filename": "tests/merge_manifests/discovery_combine/discovery.tsv", + "hashed_secret": "e8a4e965bc8809f251c3a87e02293a6a1cd74c97", + "is_verified": false, + "line_number": 160 + }, + { + "type": "Hex High Entropy String", + "filename": "tests/merge_manifests/discovery_combine/discovery.tsv", + "hashed_secret": "15152af7feace3f6d1b5f800c8c6d2c1db9e240e", + "is_verified": false, + "line_number": 161 + }, + { + "type": "Hex High Entropy String", + "filename": "tests/merge_manifests/discovery_combine/discovery.tsv", + "hashed_secret": "fb73d4c4b811c7e2f781b1a3dabc281fb9f6868b", + "is_verified": false, + "line_number": 162 + }, + { + "type": "Hex High Entropy String", + "filename": "tests/merge_manifests/discovery_combine/discovery.tsv", + "hashed_secret": "b6bb1128fca99567fb54d64d76630a9223f4c394", + "is_verified": false, + "line_number": 163 } ], "tests/test_auth.py": [ { + "type": "Secret Keyword", + "filename": "tests/test_auth.py", "hashed_secret": "5354c5456431f0267f85dde6eaa69f666af0afb1", "is_verified": false, - "line_number": 12, - "type": "Secret Keyword" + "line_number": 13 + }, + { + "type": "Secret Keyword", + "filename": "tests/test_auth.py", + "hashed_secret": "f82f808abc6f0b7b559449988f1900a38214090f", + "is_verified": false, + "line_number": 33 + }, + { + "type": "Secret Keyword", + "filename": "tests/test_auth.py", + "hashed_secret": "e5e9fa1ba31ecd1ae84f75caaa474f3a663f05f4", + "is_verified": false, + "line_number": 527 } ], "tests/test_data/packages_manifest_ok.tsv": [ { + "type": "Hex High Entropy String", + "filename": "tests/test_data/packages_manifest_ok.tsv", "hashed_secret": "0d515eaf06062d52e8c80abb4d3b713a65396d30", "is_verified": false, - "line_number": 6, - "type": "Hex High Entropy String" + "line_number": 3 }, { + "type": "Hex High Entropy String", + "filename": "tests/test_data/packages_manifest_ok.tsv", "hashed_secret": "b4cff7c2af45cdfe66195ec574a7b8832f8621ea", "is_verified": false, - "line_number": 6, - "type": "Hex High Entropy String" + "line_number": 3 + } + ], + "tests/test_diff.py": [ + { + "type": "Hex High Entropy String", + "filename": "tests/test_diff.py", + "hashed_secret": "f9e664db75c7f23a299b0b055c10e08d47073e93", + "is_verified": false, + "line_number": 25 + }, + { + "type": "Hex High Entropy String", + "filename": "tests/test_diff.py", + "hashed_secret": "7d35b46eef5870d91fcf4d3f43fd2790a7008161", + "is_verified": false, + "line_number": 55 + }, + { + "type": "Hex High Entropy String", + "filename": "tests/test_diff.py", + "hashed_secret": "13fa17e621a69c7e6433f4180efc7e25538ac59c", + "is_verified": false, + "line_number": 61 + }, + { + "type": "Hex High Entropy String", + "filename": "tests/test_diff.py", + "hashed_secret": "871ea9e2dc980ce7fc547cd091da1d3796bbb8aa", + "is_verified": false, + "line_number": 67 } ], "tests/test_file.py": [ { + "type": "Secret Keyword", + "filename": "tests/test_file.py", "hashed_secret": "bc5494e9e5c7bd002f295376f6130e2eced73a4a", "is_verified": false, - "line_number": 386, - "type": "Secret Keyword" + "line_number": 84 } ], "tests/test_index.py": [ { - "hashed_secret": "995018d248e5bf0c4ca4fb44c52e2eb9be4876a7", + "type": "Hex High Entropy String", + "filename": "tests/test_index.py", + "hashed_secret": "0b1691078688badae57b2078ae6b00c7eec78972", "is_verified": false, - "line_number": 62, - "type": "Hex High Entropy String" + "line_number": 28 }, { - "hashed_secret": "0b1691078688badae57b2078ae6b00c7eec78972", + "type": "Hex High Entropy String", + "filename": "tests/test_index.py", + "hashed_secret": "995018d248e5bf0c4ca4fb44c52e2eb9be4876a7", "is_verified": false, - "line_number": 93, - "type": "Hex High Entropy String" + "line_number": 32 }, { + "type": "Hex High Entropy String", + "filename": "tests/test_index.py", "hashed_secret": "50d0a1cf4bf47b1625d4a8e4b0afad02012eb3da", "is_verified": false, - "line_number": 97, - "type": "Hex High Entropy String" + "line_number": 36 }, { + "type": "Hex High Entropy String", + "filename": "tests/test_index.py", "hashed_secret": "98f0a724b4285da1d5dced7c16d8a546f08c297d", "is_verified": false, - "line_number": 124, - "type": "Hex High Entropy String" + "line_number": 124 }, { + "type": "Hex High Entropy String", + "filename": "tests/test_index.py", "hashed_secret": "190506946c18c0b0b2dd270e8de76ebc8d5ad672", "is_verified": false, - "line_number": 174, - "type": "Hex High Entropy String" + "line_number": 165 }, { + "type": "Hex High Entropy String", + "filename": "tests/test_index.py", "hashed_secret": "0fdd976872ec302887d854f6b1452bd8fcdf76dc", "is_verified": false, - "line_number": 226, - "type": "Hex High Entropy String" + "line_number": 219 } ], "tests/test_jobs.py": [ { + "type": "Hex High Entropy String", + "filename": "tests/test_jobs.py", "hashed_secret": "16ac780c5e695ed5f79a4300ac498e07200c9865", "is_verified": false, - "line_number": 153, - "type": "Hex High Entropy String" + "line_number": 153 } ], "tests/test_manifests.py": [ { + "type": "Hex High Entropy String", + "filename": "tests/test_manifests.py", "hashed_secret": "6487fa7aea293dfde99cda9c6ccb9161219331c7", "is_verified": false, - "line_number": 110, - "type": "Hex High Entropy String" - }, - { - "hashed_secret": "96c9184fb19c9c1618ccf44d141f8029a739891c", - "is_verified": false, - "line_number": 424, - "type": "Hex High Entropy String" + "line_number": 110 }, { + "type": "Hex High Entropy String", + "filename": "tests/test_manifests.py", "hashed_secret": "e1da93616713812cb50e0ac845b1e9e305d949f1", "is_verified": false, - "line_number": 559, - "type": "Hex High Entropy String" + "line_number": 133 }, { + "type": "Hex High Entropy String", + "filename": "tests/test_manifests.py", "hashed_secret": "47f42f4c34fddab383b817e689dc0fb75af81266", "is_verified": false, - "line_number": 583, - "type": "Hex High Entropy String" + "line_number": 141 }, { + "type": "Hex High Entropy String", + "filename": "tests/test_manifests.py", "hashed_secret": "300d95dd5d30ab6928ffda6c08c6a129a23e5b39", "is_verified": false, - "line_number": 607, - "type": "Hex High Entropy String" + "line_number": 150 }, { - "hashed_secret": "f9e664db75c7f23a299b0b055c10e08d47073e93", + "type": "Hex High Entropy String", + "filename": "tests/test_manifests.py", + "hashed_secret": "96c9184fb19c9c1618ccf44d141f8029a739891c", "is_verified": false, - "line_number": 675, - "type": "Hex High Entropy String" + "line_number": 159 }, { + "type": "Hex High Entropy String", + "filename": "tests/test_manifests.py", "hashed_secret": "7c35c215b326b9463b669b657c1ff9873ff53d9a", "is_verified": false, - "line_number": 889, - "type": "Hex High Entropy String" + "line_number": 641 + }, + { + "type": "Hex High Entropy String", + "filename": "tests/test_manifests.py", + "hashed_secret": "f9e664db75c7f23a299b0b055c10e08d47073e93", + "is_verified": false, + "line_number": 675 + }, + { + "type": "Hex High Entropy String", + "filename": "tests/test_manifests.py", + "hashed_secret": "14bafb268df75cc8f8d8f87f61468d249de936ba", + "is_verified": false, + "line_number": 803 + }, + { + "type": "Hex High Entropy String", + "filename": "tests/test_manifests.py", + "hashed_secret": "bce350451d95a06da0423916d228575c633008ef", + "is_verified": false, + "line_number": 811 }, { + "type": "Hex High Entropy String", + "filename": "tests/test_manifests.py", "hashed_secret": "0d515eaf06062d52e8c80abb4d3b713a65396d30", "is_verified": false, - "line_number": 893, - "type": "Hex High Entropy String" + "line_number": 893 }, { + "type": "Hex High Entropy String", + "filename": "tests/test_manifests.py", "hashed_secret": "b4cff7c2af45cdfe66195ec574a7b8832f8621ea", "is_verified": false, - "line_number": 898, - "type": "Hex High Entropy String" + "line_number": 898 } ], "tests/test_submission.py": [ { + "type": "Secret Keyword", + "filename": "tests/test_submission.py", "hashed_secret": "4d48c6376ebfec3af94c3d9a1b33f4c045e361f7", "is_verified": false, - "line_number": 164, - "type": "Secret Keyword" + "line_number": 164 } ] }, - "version": "0.13.1", - "word_list": { - "file": null, - "hash": null - } + "generated_at": "2023-10-09T18:58:28Z" } diff --git a/.travis.yml b/.travis.yml index b8eac3683..20d9257b6 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,27 +1,23 @@ +dist: jammy language: python python: -- 3.6 -- 3.7 - 3.8 - 3.9 - 3.10 jobs: allow_failures: - - python: 3.6 - - python: 3.7 - python: 3.8 - python: 3.10 before_install: - pip install --upgrade pip -- 'curl -sSL https://raw.githubusercontent.com/python-poetry/poetry/master/get-poetry.py | python -' -- source $HOME/.poetry/env +- curl -sSL https://install.python-poetry.org | python - - which poetry - poetry --version install: -- poetry install -vv -- poetry show +- poetry install -vv --all-extras --no-interaction --no-cache +- poetry show -vv script: -- poetry run pytest -vv ./tests +- echo Unit testing is now done in GH Actions, this just pushing to pypi as necessary before_deploy: - poetry config repositories.testpypi https://test.pypi.org/legacy/ - poetry config pypi-token.testpypi $PYPI_API_TOKEN @@ -50,6 +46,3 @@ env: global: - secure: jCslhsRF+cpozrC75nmYkBizmdBL/01StonnarwaWmCFBEzX22i6XklpUyROYRpvHTQ7SHJlFh+kmvptT4UmRxFCqKq/RNFq2We0rRhnfWNRsQOok5ENU5MDMgD0kl3RUeEElpVk36aQ8cUyybNyuel0ZFC1F+C5GOUKaCtiDrcX9RVu5jYbydi1W1RfsCNkmyJND8efdqBXD81BueXqP7xYxdyqz+f3du/1qIx5GR/X+jv6WE2Oum6EMq5tbiWf3qc6OnxNdNd0p/O7seBIZyvOZvFvX8Cx3PsCfSbWyeX0hcZCDIhnG5YTah944MplvVR70lOx0GvzmxHZCpxffUfliXLJCrBQy9RZbomXcXI6c1h88DMt6odYNDlZJpcV12sQypbWBr7FJb1En93FcSqpdoPKEU9eR+WzQ0/GNFL5KibZ95yQdwT31gnlnz7NLQz8ubdkrKMpZTbDM1HCAgLN326jMwYpn/RxRtXFcrxvA6sTxpsCQhWssIBRHv5sY0c5vlDkLADNAeA1s9+SOKWAKHJh5NzFby+vt59vi5QSkdWRsKwZsk01NNrBUFcJuDxsp9wTEO3NigzoYrnZXk2ews9+WRaiTmsEWa4/LSfY0afQA4ezbGVT4yY/tje2XFhsaYCT/JsJpHBgyNDadX6FaAraKw4oqsQDu76/KjU= - secure: cFv/73omvtKB7/3OYuIodzNwSLUlqJuGgtyTf0W6pN5oJSpcE9U4Ti/ub2aUABaH15e0Fdp+MxaL+j4x5/TmzGwrmpP8Ts2nueJNh1XeC2aO2S+YevRuV+8L4rwbFvZzYe/rL9hbKx9Ghiatc/fk147j4Svf/ZNWPBHgBloZaXWGR2kRxrRrjnogXceTR6K2mPUMUr0+azYDH0Zwuu3Y5HF/CT5DpxWGs657VjM6EiGsZ6UUq4sqj4XZKKjTtp/I8zPH1z6NliBDqVtASX3UsZewbiDaa1DIs4NBN5QDSR3/vQfVoouPFy7M/oIqIpvDFOWfHahA8ac3/CdlMAT64KknRodpm/s2uUxJOtrDOSBMtdREEc+yMuBPf85+s+Vk6B8upkIYnpI2d7vDaHSFBIlQk/IbaoUsRUVeacQ6b20JhPyYMrKnvG9/aW7KfYJr23SpRwjwvh6D4ejbmFtePrU/uk1Js81lCUkYHBBqDvRXW/BNMKxyXTyMlCL4om4gIlQpBmru+vpDGoHHLzO+I9axmBBKWq1HgR5nF89Nd6AMoSTzAwt+7xmMsgaDaMewZczBnLyeGiKNKATkQ66qdhqUB+6dmKpTyDCFOwRaDUOaH71f3wVamOQpoHUdCPNnIh1B6RuZaXylGyO2qtsr65bC2uZCHVw9dk3eeqL2TUU= -after_deploy: -- pip install gen3git -- gen3git release diff --git a/Jenkinsfile.security b/Jenkinsfile.security deleted file mode 100644 index 0a5c52179..000000000 --- a/Jenkinsfile.security +++ /dev/null @@ -1,6 +0,0 @@ -#!groovy - -@Library('cdis-jenkins-lib@master') _ - -securityPipeline { -} \ No newline at end of file diff --git a/README.md b/README.md index 6a05b6ae9..7d8eb0588 100755 --- a/README.md +++ b/README.md @@ -19,9 +19,11 @@ The docs here contain general descriptions of the different pieces of the SDK an - [Available Classes](docs/reference/sdkClasses.md) - [Indexing Tools](docs/howto/diirmIndexing.md) - [Metadata Tools](docs/howto/metadataTools.md) - - [Metadata Crosswalk Tools](docs/howto/crosswalk.md) + - [Gen3 Discovery Page Metadata Tools](docs/howto/discoveryMetadataTools.md) + - [Gen3 Subject-level Crosswalk Metadata Tools](docs/howto/crosswalk.md) - [Bundle Tools](docs/howto/bundleTools.md) - [Development](docs/howto/devTest.md) + - [CLI](docs/howto/cli.md) --- diff --git a/docs/_build/doctrees/auth.doctree b/docs/_build/doctrees/auth.doctree index d6671d05b..20f592f88 100644 Binary files a/docs/_build/doctrees/auth.doctree and b/docs/_build/doctrees/auth.doctree differ diff --git a/docs/_build/doctrees/environment.pickle b/docs/_build/doctrees/environment.pickle index 710573656..fd6a32ce4 100644 Binary files a/docs/_build/doctrees/environment.pickle and b/docs/_build/doctrees/environment.pickle differ diff --git a/docs/_build/doctrees/file.doctree b/docs/_build/doctrees/file.doctree index cf42a17bd..4c3eb53b5 100644 Binary files a/docs/_build/doctrees/file.doctree and b/docs/_build/doctrees/file.doctree differ diff --git a/docs/_build/doctrees/index.doctree b/docs/_build/doctrees/index.doctree index 22e59941f..8179ccf0b 100644 Binary files a/docs/_build/doctrees/index.doctree and b/docs/_build/doctrees/index.doctree differ diff --git a/docs/_build/doctrees/indexing.doctree b/docs/_build/doctrees/indexing.doctree index dfe66c058..380d7c7b0 100644 Binary files a/docs/_build/doctrees/indexing.doctree and b/docs/_build/doctrees/indexing.doctree differ diff --git a/docs/_build/doctrees/jobs.doctree b/docs/_build/doctrees/jobs.doctree index 378cd9b39..050b3b31d 100644 Binary files a/docs/_build/doctrees/jobs.doctree and b/docs/_build/doctrees/jobs.doctree differ diff --git a/docs/_build/doctrees/metadata.doctree b/docs/_build/doctrees/metadata.doctree index cfa721c4d..75b1f4694 100644 Binary files a/docs/_build/doctrees/metadata.doctree and b/docs/_build/doctrees/metadata.doctree differ diff --git a/docs/_build/doctrees/object.doctree b/docs/_build/doctrees/object.doctree index 4dff4cb75..8b3d30b62 100644 Binary files a/docs/_build/doctrees/object.doctree and b/docs/_build/doctrees/object.doctree differ diff --git a/docs/_build/doctrees/query.doctree b/docs/_build/doctrees/query.doctree index 0b78e4493..218c50e4c 100644 Binary files a/docs/_build/doctrees/query.doctree and b/docs/_build/doctrees/query.doctree differ diff --git a/docs/_build/doctrees/submission.doctree b/docs/_build/doctrees/submission.doctree index 132fbe96f..8ed54272a 100644 Binary files a/docs/_build/doctrees/submission.doctree and b/docs/_build/doctrees/submission.doctree differ diff --git a/docs/_build/doctrees/tools.doctree b/docs/_build/doctrees/tools.doctree index 9f6259a53..2f3e2df38 100644 Binary files a/docs/_build/doctrees/tools.doctree and b/docs/_build/doctrees/tools.doctree differ diff --git a/docs/_build/doctrees/tools/drs_pull.doctree b/docs/_build/doctrees/tools/drs_pull.doctree index 7f346d734..6b3e2107f 100644 Binary files a/docs/_build/doctrees/tools/drs_pull.doctree and b/docs/_build/doctrees/tools/drs_pull.doctree differ diff --git a/docs/_build/doctrees/tools/indexing.doctree b/docs/_build/doctrees/tools/indexing.doctree index b73838368..1984d47ce 100644 Binary files a/docs/_build/doctrees/tools/indexing.doctree and b/docs/_build/doctrees/tools/indexing.doctree differ diff --git a/docs/_build/doctrees/tools/metadata.doctree b/docs/_build/doctrees/tools/metadata.doctree index 24f8e699b..3b74e55ad 100644 Binary files a/docs/_build/doctrees/tools/metadata.doctree and b/docs/_build/doctrees/tools/metadata.doctree differ diff --git a/docs/_build/doctrees/wss.doctree b/docs/_build/doctrees/wss.doctree index 0964a0c68..81babe9ab 100644 Binary files a/docs/_build/doctrees/wss.doctree and b/docs/_build/doctrees/wss.doctree differ diff --git a/docs/_build/html/.buildinfo b/docs/_build/html/.buildinfo index cfb0b13d7..c1d984323 100644 --- a/docs/_build/html/.buildinfo +++ b/docs/_build/html/.buildinfo @@ -1,4 +1,4 @@ # Sphinx build info version 1 # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done. -config: c204aa2869f9bc5f949a2abce63f0f44 +config: 5f84980db6ce51f04ab25ae1f011fb32 tags: 645f666f9bcd5a90fca523b33c5a78b7 diff --git a/docs/_build/html/_modules/gen3/auth.html b/docs/_build/html/_modules/gen3/auth.html index 1ee5bc0ac..221a1a0d2 100644 --- a/docs/_build/html/_modules/gen3/auth.html +++ b/docs/_build/html/_modules/gen3/auth.html @@ -1,18 +1,15 @@ - - +
def decode_token(token_str):
- """
+ """
jq -r '.api_key' < ~/.gen3/qa-covid19.planx-pla.net.json | awk -F . '{ print $2 }' | base64 --decode | jq -r .
"""
tokenParts = token_str.split(".")
@@ -72,7 +69,7 @@ Source code for gen3.auth
def endpoint_from_token(token_str):
- """
+ """
Extract the endpoint from a JWT issue ("iss" property)
"""
info = decode_token(token_str)
@@ -80,11 +77,11 @@ Source code for gen3.auth
endpoint = urlparts.scheme + "://" + urlparts.hostname
if urlparts.port:
endpoint += ":" + str(urlparts.port)
- return endpoint
+ return remove_trailing_whitespace_and_slashes_in_url(endpoint)
def _handle_access_token_response(resp, token_key):
- """
+ """
Shared helper for both get_access_token_with_key and get_access_token_from_wts
"""
err_msg = "Failed to get an access token from {}:\n{}"
@@ -100,7 +97,7 @@ Source code for gen3.auth
def get_access_token_with_key(api_key):
- """
+ """
Try to fetch an access token given the api key
"""
endpoint = endpoint_from_token(api_key["api_key"])
@@ -111,6 +108,22 @@ Source code for gen3.auth
return _handle_access_token_response(resp, token_key)
+def get_access_token_with_client_credentials(endpoint, client_credentials, scopes):
+ """
+ Try to get an access token from Fence using client credentials
+
+ Args:
+ endpoint (str): URL of the Gen3 instance to get an access token for
+ client_credentials ((str, str) tuple): (client ID, client secret) tuple
+ scopes (str): space-delimited list of scopes to request
+ """
+ if not endpoint:
+ raise ValueError("'endpoint' must be specified when using client credentials")
+ url = f"{endpoint}/user/oauth2/token?grant_type=client_credentials&scope={scopes}"
+ resp = requests.post(url, auth=client_credentials)
+ return _handle_access_token_response(resp, "access_token")
+
+
def get_wts_endpoint(namespace=os.getenv("NAMESPACE", "default")):
return "http://workspace-token-service.{}.svc.cluster.local".format(namespace)
@@ -128,7 +141,7 @@ Source code for gen3.auth
def get_token_cache_file_name(key):
- """Compute the path to the access-token cache file"""
+ """Compute the path to the access-token cache file"""
cache_folder = "{}/.cache/gen3/".format(os.path.expanduser("~"))
if not os.path.isdir(cache_folder):
os.makedirs(cache_folder)
@@ -139,8 +152,10 @@ Source code for gen3.auth
return cache_prefix + m.hexdigest()
-[docs]class Gen3Auth(AuthBase):
- """Gen3 auth helper class for use with requests auth.
+
+[docs]
+class Gen3Auth(AuthBase):
+ """Gen3 auth helper class for use with requests auth.
Implements requests.auth.AuthBase in order to support JWT authentication.
Generates access tokens from the provided refresh token file or string.
@@ -155,6 +170,10 @@ Source code for gen3.auth
refresh_token (str, opt): The JSON web token. Optional if working in a Gen3 Workspace.
idp (str, opt): If working in a Gen3 Workspace, the IDP to use can be specified -
"local" indicates the local environment fence idp
+ client_credentials (tuple, opt): The (client_id, client_secret) credentials for an OIDC client
+ that has the 'client_credentials' grant, allowing it to obtain access tokens.
+ client_scopes (str, opt): Space-separated list of scopes requested for access tokens obtained from client
+ credentials. Default: "user data openid"
Examples:
This generates the Gen3Auth class pointed at the sandbox commons while
@@ -175,13 +194,33 @@ Source code for gen3.auth
than pass the refresh_file argument to the Gen3Auth
constructor.
+ If working with an OIDC client that has the 'client_credentials' grant, allowing it to obtain
+ access tokens, provide the client ID and secret:
+
+ Note: client secrets should never be hardcoded!
+
+ >>> auth = Gen3Auth(
+ endpoint="https://datacommons.example",
+ client_credentials=("client ID", os.environ["GEN3_OIDC_CLIENT_CREDS_SECRET"])
+ )
+
If working in a Gen3 Workspace, initialize as follows:
>>> auth = Gen3Auth()
"""
- def __init__(self, endpoint=None, refresh_file=None, refresh_token=None, idp=None):
- # note - this is not actually a JWT refresh token - it's a
+ def __init__(
+ self,
+ endpoint=None,
+ refresh_file=None,
+ refresh_token=None,
+ idp=None,
+ client_credentials=None,
+ client_scopes=None,
+ ):
+ logging.debug("Initializing auth..")
+ self.endpoint = remove_trailing_whitespace_and_slashes_in_url(endpoint)
+ # note - `_refresh_token` is not actually a JWT refresh token - it's a
# gen3 api key with a token as the "api_key" property
self._refresh_token = refresh_token
self._access_token = None
@@ -191,6 +230,13 @@ Source code for gen3.auth
self._use_wts = False
self._external_wts_host = None
self._refresh_file = refresh_file
+ self._client_credentials = client_credentials
+ if self._client_credentials:
+ self._client_scopes = client_scopes or "user data openid"
+ elif client_scopes:
+ raise ValueError(
+ "'client_scopes' cannot be specified without 'client_credentials'"
+ )
if refresh_file and refresh_token:
raise ValueError(
@@ -227,6 +273,15 @@ Source code for gen3.auth
logging.warning("Unable to find refresh_file")
refresh_file = None
+ if self._client_credentials:
+ if not endpoint:
+ raise ValueError(
+ "'endpoint' must be specified when '_client_credentials' is specified"
+ )
+ self._access_token = get_access_token_with_client_credentials(
+ endpoint, self._client_credentials, self._client_scopes
+ )
+
if not self._access_token:
# at this point - refresh_file either exists or is None
if not refresh_file and not refresh_token:
@@ -254,32 +309,35 @@ Source code for gen3.auth
if idp or (
endpoint
and (
- not endpoint
+ not endpoint.rstrip("/")
== endpoint_from_token(self._refresh_token["api_key"])
)
):
try:
+ logging.debug(
+ "Switch to using WTS and set external WTS host url.."
+ )
self._use_wts = True
self._external_wts_host = (
endpoint_from_token(self._refresh_token["api_key"])
+ "/wts/"
)
- self.get_access_token(
- remove_trailing_whitespace_and_slashes_in_url(endpoint)
- )
+ self.get_access_token()
except Gen3AuthError as g:
logging.warning(
"Could not obtain access token from WTS service."
)
raise g
- if self._access_token:
- self.endpoint = endpoint_from_token(self._access_token)
- else:
- self.endpoint = endpoint_from_token(self._refresh_token["api_key"])
+
+ if not self.endpoint:
+ if self._access_token:
+ self.endpoint = endpoint_from_token(self._access_token)
+ else:
+ self.endpoint = endpoint_from_token(self._refresh_token["api_key"])
@property
def _token_info(self):
- """
+ """
Wrapper to fix intermittent errors when the token is being refreshed
and `_access_token_info` == None
"""
@@ -288,7 +346,7 @@ Source code for gen3.auth
return self._access_token_info
def __call__(self, request):
- """Adds authorization header to the request
+ """Adds authorization header to the request
This gets called by the python.requests package on outbound requests
so that authentication can be added.
@@ -302,7 +360,7 @@ Source code for gen3.auth
return request
def _handle_401(self, response, **kwargs):
- """Handles failed requests when authorization failed.
+ """Handles failed requests when authorization failed.
This gets called after a failed request when an HTTP 401 error
occurs. This then tries to refresh the access token in the event
@@ -330,28 +388,38 @@ Source code for gen3.auth
return _response
-[docs] def refresh_access_token(self, endpoint=None):
- """Get a new access token"""
+
+[docs]
+ def refresh_access_token(self, endpoint=None):
+ """Get a new access token"""
if self._use_wts:
self._access_token = self.get_access_token_from_wts(endpoint)
+ elif self._client_credentials:
+ self._access_token = get_access_token_with_client_credentials(
+ endpoint, self._client_credentials, self._client_scopes
+ )
else:
self._access_token = get_access_token_with_key(self._refresh_token)
+
self._access_token_info = decode_token(self._access_token)
+
+ cache_file = None
if self._use_wts:
cache_file = get_token_cache_file_name(self._wts_idp)
- else:
- if self._refresh_file:
- cache_file = get_token_cache_file_name(self._refresh_token["api_key"])
-
- try:
- self._write_to_file(cache_file, self._access_token)
- except Exception as e:
- logging.warning(
- f"Unable to write access token to cache file. Exceeded number of retries."
- )
+ elif self._refresh_file:
+ cache_file = get_token_cache_file_name(self._refresh_token["api_key"])
+
+ if cache_file:
+ try:
+ self._write_to_file(cache_file, self._access_token)
+ except Exception as e:
+ logging.warning(
+ f"Unable to write access token to cache file. Exceeded number of retries. Details: {e}"
+ )
return self._access_token
+
@backoff.on_exception(
wait_gen=backoff.expo, exception=Exception, **DEFAULT_BACKOFF_SETTINGS
)
@@ -371,8 +439,10 @@ Source code for gen3.auth
logging.warning(str(e))
raise e
-[docs] def get_access_token(self, endpoint=None):
- """Get the access token - auto refresh if within 5 minutes of expiration"""
+
+[docs]
+ def get_access_token(self):
+ """Get the access token - auto refresh if within 5 minutes of expiration"""
if not self._access_token:
if self._use_wts == True:
cache_file = get_token_cache_file_name(self._wts_idp)
@@ -398,12 +468,15 @@ Source code for gen3.auth
or time.time() + 300 > self._access_token_info["exp"]
)
if need_new_token:
- return self.refresh_access_token(endpoint)
+ return self.refresh_access_token(
+ self.endpoint if hasattr(self, "endpoint") else None
+ )
# use cache
return self._access_token
+
def _get_auth_value(self):
- """Returns the Authorization header value for the request
+ """Returns the Authorization header value for the request
This gets called when added the Authorization header to the request.
This fetches the access token from the refresh token if the access token is missing.
@@ -411,8 +484,10 @@ Source code for gen3.auth
"""
return "bearer " + self.get_access_token()
-[docs] def curl(self, path, request=None, data=None):
- """
+
+[docs]
+ def curl(self, path, request=None, data=None):
+ """
Curl the given endpoint - ex: gen3 curl /user/user. Return requests.Response
Args:
@@ -443,29 +518,81 @@ Source code for gen3.auth
raise Exception("Invalid request type: " + request)
return output
-[docs] def get_access_token_from_wts(self, endpoint=None):
- """
+
+
+[docs]
+ def get_access_token_from_wts(self, endpoint=None):
+ """
Try to fetch an access token for the given idp from the wts
in the given namespace. If idp is not set, then default to "local"
"""
# attempt to get a token from the workspace-token-service
+ logging.debug("getting access token from wts..")
auth_url = get_wts_endpoint(self._wts_namespace) + "/token/"
- if self._wts_idp and self._wts_idp != "local":
- auth_url += "?idp={}".format(self._wts_idp)
- try:
- resp = requests.get(auth_url)
- if (resp and resp.status_code == 200) or (not self._external_wts_host):
- return _handle_access_token_response(resp, "token")
- except Exception as e:
- if not self._external_wts_host:
- raise e
- else:
- # Try to obtain token from external wts
- pass
+ # If non "local" idp value exists, append to auth url
+ # If user specified endpoint value, then first attempt to determine idp value.
+ if self.endpoint or (self._wts_idp and self._wts_idp != "local"):
+ # If user supplied endpoint value and not idp, figure out the idp value
+ if self.endpoint:
+ logging.debug(
+ "First try to use the local WTS to figure out idp name for the supplied endpoint.."
+ )
+ try:
+ provider_List = get_wts_idps(self._wts_namespace)
+ matchProviders = list(
+ filter(
+ lambda provider: provider["base_url"] == endpoint,
+ provider_List["providers"],
+ )
+ )
+ if len(matchProviders) == 1:
+ logging.debug("Found matching idp from local WTS.")
+ self._wts_idp = matchProviders[0]["idp"]
+ elif len(matchProviders) > 1:
+ raise ValueError(
+ "Multiple idps matched with endpoint value provided."
+ )
+ else:
+ logging.debug("Could not find matching idp from local WTS.")
+ except Exception as e:
+ logging.debug(
+ "Exception occured when making network call to local WTS."
+ )
+ if not self._external_wts_host:
+ raise e
+ else:
+ logging.debug("Since external WTS host exists, continuing on..")
+ pass
+
+ if self._wts_idp and self._wts_idp != "local":
+ auth_url += "?idp={}".format(self._wts_idp)
+
+ # If endpoint value exists, only get WTS token if idp value has been successfully determined
+ # Otherwise skip to querying external WTS
+ # This is to prevent local WTS from supplying an incorrect token to user
+ if (
+ not self._external_wts_host
+ or not self.endpoint
+ or (self.endpoint and self._wts_idp != "local")
+ ):
+ try:
+ logging.debug("Try to get access token from local WTS..")
+ logging.debug(f"{auth_url=}")
+ resp = requests.get(auth_url)
+ if (resp and resp.status_code == 200) or (not self._external_wts_host):
+ return _handle_access_token_response(resp, "token")
+ except Exception as e:
+ if not self._external_wts_host:
+ raise e
+ else:
+ # Try to obtain token from external wts
+ logging.debug("Could get obtain token from Local WTS.")
+ pass
# local workspace wts call failed, try using a network call
# First get access token with WTS host
+ logging.debug("Trying to get access token from external WTS Host..")
wts_token = get_access_token_with_key(self._refresh_token)
auth_url = self._external_wts_host + "token/"
@@ -486,9 +613,14 @@ Source code for gen3.auth
provider_List["providers"],
)
)
+ else:
+ raise Exception(
+ "Unable to generate matching identity providers (no IdP or endpoint provided)"
+ )
if len(matchProviders) == 1:
self._wts_idp = matchProviders[0]["idp"]
+ logging.debug("Succesfully determined idp value: {}".format(self._wts_idp))
else:
idp_list = "\n "
@@ -525,14 +657,16 @@ Source code for gen3.auth
+ idp_list
+ "Query /wts/external_oidc/ for more information."
)
-
+ logging.debug("Finally getting access token..")
auth_url += "?idp={}".format(self._wts_idp)
header = {"Authorization": "Bearer " + wts_token}
resp = requests.get(auth_url, headers=header)
err_msg = "Please make sure the target commons is connected on your profile page and that connection has not expired."
if resp.status_code != 200:
logging.warning(err_msg)
- return _handle_access_token_response(resp, "token")
+ return _handle_access_token_response(resp, "token")
+
+
diff --git a/docs/_build/html/_modules/gen3/file.html b/docs/_build/html/_modules/gen3/file.html
index 315807954..5189947bd 100644
--- a/docs/_build/html/_modules/gen3/file.html
+++ b/docs/_build/html/_modules/gen3/file.html
@@ -1,18 +1,15 @@
-
-
+
gen3.file — Gen3 SDK documentation
-
-
-
-
-
-
-
+
+
+
+
+
@@ -34,14 +31,33 @@
Source code for gen3.file
import json
import requests
+import json
+import asyncio
+import aiohttp
+import aiofiles
+import time
+from tqdm import tqdm
+from types import SimpleNamespace as Namespace
+import os
+import requests
+from pathlib import Path
+
+from cdislogging import get_logger
+
+from gen3.index import Gen3Index
+from gen3.utils import DEFAULT_BACKOFF_SETTINGS, raise_for_status_and_print_error
+from urllib.parse import parse_qsl, urlencode, urlparse, urlunparse
+
+logging = get_logger("__name__")
-class Gen3FileError(Exception):
- pass
+MAX_RETRIES = 3
-[docs]class Gen3File:
- """For interacting with Gen3 file management features.
+
+[docs]
+class Gen3File:
+ """For interacting with Gen3 file management features.
A class for interacting with the Gen3 file download services.
Supports getting presigned urls right now.
@@ -62,9 +78,12 @@ Source code for gen3.file
# auth_provider legacy interface required endpoint as 1st arg
self._auth_provider = auth_provider or endpoint
self._endpoint = self._auth_provider.endpoint
+ self.unsuccessful_downloads = []
-[docs] def get_presigned_url(self, guid, protocol=None):
- """Generates a presigned URL for a file.
+
+[docs]
+ def get_presigned_url(self, guid, protocol=None):
+ """Generates a presigned URL for a file.
Retrieves a presigned url for a file giving access to a file for a limited time.
@@ -80,16 +99,19 @@ Source code for gen3.file
api_url = "{}/user/data/download/{}".format(self._endpoint, guid)
if protocol:
api_url += "?protocol={}".format(protocol)
- output = requests.get(api_url, auth=self._auth_provider).text
+ resp = requests.get(api_url, auth=self._auth_provider)
+ raise_for_status_and_print_error(resp)
try:
- data = json.loads(output)
+ return resp.json()
except:
- return output
- return data
+ return resp.text
+
-[docs] def delete_file(self, guid):
- """
+
+[docs]
+ def delete_file(self, guid):
+ """
This method is DEPRECATED. Use delete_file_locations() instead.
Delete all locations of a stored data file and remove its record from indexd
@@ -104,8 +126,11 @@ Source code for gen3.file
return output
-[docs] def delete_file_locations(self, guid):
- """
+
+
+[docs]
+ def delete_file_locations(self, guid):
+ """
Delete all locations of a stored data file and remove its record from indexd
Args:
@@ -118,8 +143,13 @@ Source code for gen3.file
return output
-[docs] def upload_file(self, file_name, authz=None, protocol=None, expires_in=None):
- """
+
+
+[docs]
+ def upload_file(
+ self, file_name, authz=None, protocol=None, expires_in=None, bucket=None
+ ):
+ """
Get a presigned url for a file to upload
Args:
@@ -130,6 +160,9 @@ Source code for gen3.file
expires_in (int): Amount in seconds that the signed url will expire from datetime.utcnow().
Be sure to use a positive integer.
This value will also be treated as <= MAX_PRESIGNED_URL_TTL in the fence configuration.
+ bucket (str): Bucket to upload to. The bucket must be configured in the Fence instance's
+ `ALLOWED_DATA_UPLOAD_BUCKETS` setting. If not specified, Fence defaults to the
+ `DATA_UPLOAD_BUCKET` setting.
Returns:
Document: json representation for the file upload
"""
@@ -143,17 +176,141 @@ Source code for gen3.file
body["expires_in"] = expires_in
if file_name:
body["file_name"] = file_name
+ if bucket:
+ body["bucket"] = bucket
headers = {"Content-Type": "application/json"}
- output = requests.post(
+ resp = requests.post(
api_url, auth=self._auth_provider, json=body, headers=headers
- ).text
+ )
+ raise_for_status_and_print_error(resp)
try:
- data = json.loads(output)
+ data = json.loads(resp.text)
except:
- return output
+ return resp.text
+
+ return data
+
+
+ def _ensure_dirpath_exists(path: Path) -> Path:
+ """Utility to create a directory if missing.
+ Returns the path so that the call can be inlined in another call
+ Args:
+ path (Path): path to create
+ Returns
+ path of created directory
+ """
+ assert path
+ out_path: Path = path
+
+ if not out_path.exists():
+ out_path.mkdir(parents=True, exist_ok=True)
+
+ return out_path
+
+
+[docs]
+ def download_single(self, object_id, path):
+ """
+ Download a single file using its GUID.
+
+ Args:
+ object_id (str): The file's unique ID
+ path (str): Path to store the downloaded file at
+ """
+ try:
+ url = self.get_presigned_url(object_id)
+ except Exception as e:
+ logging.critical(f"Unable to get a presigned URL for download: {e}")
+ return False
+
+ response = requests.get(url["url"], stream=True)
+ if response.status_code != 200:
+ logging.error(f"Response code: {response.status_code}")
+ if response.status_code >= 500:
+ for _ in range(MAX_RETRIES):
+ logging.info("Retrying now...")
+ # NOTE could be updated with exponential backoff
+ time.sleep(1)
+ response = requests.get(url["url"], stream=True)
+ if response.status == 200:
+ break
+ if response.status != 200:
+ logging.critical("Response status not 200, try again later")
+ return False
+ else:
+ return False
+
+ response.raise_for_status()
+
+ total_size_in_bytes = int(response.headers.get("content-length"))
+ total_downloaded = 0
+
+ index = Gen3Index(self._auth_provider)
+ record = index.get_record(object_id)
+
+ filename = record["file_name"]
+
+ out_path = Gen3File._ensure_dirpath_exists(Path(path))
+
+ with open(os.path.join(out_path, filename), "wb") as f:
+ for data in response.iter_content(4096):
+ total_downloaded += len(data)
+ f.write(data)
+
+ if total_size_in_bytes == total_downloaded:
+ logging.info(f"File {filename} downloaded successfully")
+
+ else:
+ logging.error(f"File {filename} not downloaded successfully")
+ return False
+
+ return True
+
+
+
+[docs]
+ def upload_file_to_guid(
+ self, guid, file_name, protocol=None, expires_in=None, bucket=None
+ ):
+ """
+ Get a presigned url for a file to upload to the specified existing GUID
+
+ Args:
+ file_name (str): file_name to use for upload
+ protocol (str): Storage protocol to use for upload: "s3", "az".
+ If this isn't set, the default will be "s3"
+ expires_in (int): Amount in seconds that the signed url will expire from datetime.utcnow().
+ Be sure to use a positive integer.
+ This value will also be treated as <= MAX_PRESIGNED_URL_TTL in the fence configuration.
+ bucket (str): Bucket to upload to. The bucket must be configured in the Fence instance's
+ `ALLOWED_DATA_UPLOAD_BUCKETS` setting. If not specified, Fence defaults to the
+ `DATA_UPLOAD_BUCKET` setting.
+ Returns:
+ Document: json representation for the file upload
+ """
+ url = f"{self._endpoint}/user/data/upload/{guid}"
+ params = {}
+ if protocol:
+ params["protocol"] = protocol
+ if expires_in:
+ params["expires_in"] = expires_in
+ if file_name:
+ params["file_name"] = file_name
+ if bucket:
+ params["bucket"] = bucket
+
+ url_parts = list(urlparse(url))
+ query = dict(parse_qsl(url_parts[4]))
+ query.update(params)
+ url_parts[4] = urlencode(query)
+ url = urlunparse(url_parts)
+
+ resp = requests.get(url, auth=self._auth_provider)
+ raise_for_status_and_print_error(resp)
+ return resp.json()
+
- return data
diff --git a/docs/_build/html/_modules/gen3/index.html b/docs/_build/html/_modules/gen3/index.html
index 60fda2cae..7c4d5da98 100644
--- a/docs/_build/html/_modules/gen3/index.html
+++ b/docs/_build/html/_modules/gen3/index.html
@@ -1,18 +1,15 @@
-
-
+
gen3.index — Gen3 SDK documentation
-
-
-
-
-
-
-
+
+
+
+
+
@@ -48,8 +45,10 @@ Source code for gen3.index
logging = get_logger("__name__")
-[docs]class Gen3Index:
- """
+
+[docs]
+class Gen3Index:
+ """
A class for interacting with the Gen3 Index services.
@@ -85,8 +84,10 @@ Source code for gen3.index
self.client = client.IndexClient(endpoint, auth=auth_provider)
### Get Requests
-[docs] def is_healthy(self):
- """
+
+[docs]
+ def is_healthy(self):
+ """
Return if indexd is healthy or not
@@ -98,9 +99,12 @@ Source code for gen3.index
return False
return response.text == "Healthy"
-[docs] @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
+
+
+[docs]
+ @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
def get_version(self):
- """
+ """
Return the version of indexd
@@ -109,9 +113,12 @@ Source code for gen3.index
raise_for_status_and_print_error(response)
return response.json()
-[docs] @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
+
+
+[docs]
+ @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
def get_stats(self):
- """
+ """
Return basic info about the records in indexd
@@ -120,9 +127,12 @@ Source code for gen3.index
raise_for_status_and_print_error(response)
return response.json()
-[docs] @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
+
+
+[docs]
+ @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
def get_all_records(self, limit=None, paginate=False, start=None):
- """
+ """
Get a list of all records
@@ -165,9 +175,12 @@ Source code for gen3.index
return all_records
-[docs] @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
+
+
+[docs]
+ @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
def get_records_on_page(self, limit=None, page=None):
- """
+ """
Get a list of all records given the page and page size limit
@@ -188,9 +201,12 @@ Source code for gen3.index
return response.json().get("records")
-[docs] @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
+
+
+[docs]
+ @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
async def async_get_record(self, guid=None, _ssl=None):
- """
+ """
Asynchronous function to request a record from indexd.
Args:
@@ -207,9 +223,12 @@ Source code for gen3.index
return response
-[docs] @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
+
+
+[docs]
+ @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
async def async_get_records_on_page(self, limit=None, page=None, _ssl=None):
- """
+ """
Asynchronous function to request a page from indexd.
Args:
@@ -236,11 +255,14 @@ Source code for gen3.index
return response.get("records")
-[docs] @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
+
+
+[docs]
+ @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
async def async_get_records_from_checksum(
self, checksum, checksum_type="md5", _ssl=None
):
- """
+ """
Asynchronous function to request records from indexd matching checksum.
Args:
@@ -264,9 +286,12 @@ Source code for gen3.index
return response.get("records")
-[docs] @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
+
+
+[docs]
+ @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
def get(self, guid, dist_resolution=True):
- """
+ """
Get the metadata associated with the given id, alias, or
distributed identifier
@@ -285,9 +310,12 @@ Source code for gen3.index
return rec.to_json()
-[docs] @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
+
+
+[docs]
+ @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
def get_urls(self, size=None, hashes=None, guids=None):
- """
+ """
Get a list of urls that match query params
@@ -306,9 +334,12 @@ Source code for gen3.index
urls = self.client._get("urls", params=p).json()
return [url for _, url in urls.items()]
-[docs] @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
+
+
+[docs]
+ @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
def get_record(self, guid):
- """
+ """
Get the metadata associated with a given id
@@ -320,18 +351,24 @@ Source code for gen3.index
return rec.to_json()
-[docs] @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
+
+
+[docs]
+ @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
def get_record_doc(self, guid):
- """
+ """
Get the metadata associated with a given id
"""
return self.client.get(guid)
-[docs] @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
+
+
+[docs]
+ @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
def get_with_params(self, params=None):
- """
+ """
Return a document object corresponding to the supplied parameters, such
as ``{'hashes': {'md5': '...'}, 'size': '...', 'metadata': {'file_state': '...'}}``.
@@ -348,9 +385,12 @@ Source code for gen3.index
return rec.to_json()
-[docs] @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
+
+
+[docs]
+ @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
async def async_get_with_params(self, params, _ssl=None):
- """
+ """
Return a document object corresponding to the supplied parameter
@@ -374,9 +414,12 @@ Source code for gen3.index
return response
-[docs] @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
+
+
+[docs]
+ @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
def get_latest_version(self, guid, has_version=False):
- """
+ """
Get the metadata of the latest index record version associated
with the given id
@@ -395,9 +438,12 @@ Source code for gen3.index
return rec.to_json()
-[docs] @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
+
+
+[docs]
+ @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
def get_versions(self, guid):
- """
+ """
Get the metadata of index record version associated with the
given id
@@ -413,9 +459,12 @@ Source code for gen3.index
return [r for _, r in versions.items()]
+
### Post Requests
-[docs] @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
+
+[docs]
+ @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
def create_record(
self,
hashes,
@@ -430,7 +479,7 @@ Source code for gen3.index
version=None,
authz=None,
):
- """
+ """
Create a new record and add it to the index
@@ -466,7 +515,10 @@ Source code for gen3.index
)
return rec.to_json()
-[docs] @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
+
+
+[docs]
+ @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
async def async_create_record(
self,
hashes,
@@ -482,7 +534,7 @@ Source code for gen3.index
authz=None,
_ssl=None,
):
- """
+ """
Asynchronous function to create a record in indexd.
Args:
@@ -507,37 +559,49 @@ Source code for gen3.index
urls = []
json = {
- "urls": urls,
"form": "object",
"hashes": hashes,
"size": size,
- "file_name": file_name,
- "metadata": metadata,
- "urls_metadata": urls_metadata,
- "baseid": baseid,
- "acl": acl,
- "authz": authz,
- "version": version,
+ "urls": urls or [],
}
-
if did:
json["did"] = did
+ if file_name:
+ json["file_name"] = file_name
+ if metadata:
+ json["metadata"] = metadata
+ if baseid:
+ json["baseid"] = baseid
+ if acl:
+ json["acl"] = acl
+ if urls_metadata:
+ json["urls_metadata"] = urls_metadata
+ if version:
+ json["version"] = version
+ if authz:
+ json["authz"] = authz
+
+ # aiohttp only allows basic auth with their built in auth, so we
+ # need to manually add JWT auth header
+ headers = {"Authorization": self.client.auth._get_auth_value()}
async with session.post(
f"{self.client.url}/index/",
json=json,
- headers={"content-type": "application/json"},
+ headers=headers,
ssl=_ssl,
- auth=self.client.auth,
) as response:
- raise_for_status_and_print_error(response)
+ assert response.status == 200, await response.json()
response = await response.json()
return response
-[docs] @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
+
+
+[docs]
+ @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
def create_blank(self, uploader, file_name=None):
- """
+ """
Create a blank record
@@ -561,7 +625,10 @@ Source code for gen3.index
return self.get_record(rec["did"])
-[docs] @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
+
+
+[docs]
+ @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
def create_new_version(
self,
guid,
@@ -576,7 +643,7 @@ Source code for gen3.index
version=None,
authz=None,
):
- """
+ """
Add new version for the document associated to the provided uuid
@@ -636,9 +703,12 @@ Source code for gen3.index
return self.get_record(rec["did"])
return None
-[docs] @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
+
+
+[docs]
+ @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
def get_records(self, dids):
- """
+ """
Get a list of documents given a list of dids
@@ -662,11 +732,14 @@ Source code for gen3.index
return response.json()
+
### Put Requests
-[docs] @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
+
+[docs]
+ @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
def update_blank(self, guid, rev, hashes, size, urls=None, authz=None):
- """
+ """
Update only hashes and size for a blank index
@@ -698,7 +771,10 @@ Source code for gen3.index
return self.get_record(rec["did"])
-[docs] @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
+
+
+[docs]
+ @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
def update_record(
self,
guid,
@@ -710,7 +786,7 @@ Source code for gen3.index
authz=None,
urls_metadata=None,
):
- """
+ """
Update an existing entry in the index
@@ -738,7 +814,10 @@ Source code for gen3.index
rec.patch()
return rec.to_json()
-[docs] @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
+
+
+[docs]
+ @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
async def async_update_record(
self,
guid,
@@ -749,8 +828,10 @@ Source code for gen3.index
acl=None,
authz=None,
urls_metadata=None,
+ _ssl=None,
+ **kwargs,
):
- """
+ """
Asynchronous function to update a record in indexd.
Args:
@@ -770,30 +851,50 @@ Source code for gen3.index
"authz": authz,
"urls_metadata": urls_metadata,
}
- record = await async_get_record(guid)
+ record = await self.async_get_record(guid)
revision = record.get("rev")
for key, value in updatable_attrs.items():
if value is not None:
record[key] = value
+ del record["created_date"]
+ del record["rev"]
+ del record["updated_date"]
+ del record["version"]
+ del record["uploader"]
+ del record["form"]
+ del record["urls_metadata"]
+ del record["baseid"]
+ del record["size"]
+ del record["hashes"]
+ del record["did"]
+
+ logging.info(f"PUT-ing record: {record}")
+
+ # aiohttp only allows basic auth with their built in auth, so we
+ # need to manually add JWT auth header
+ headers = {"Authorization": self.client.auth._get_auth_value()}
+
async with session.put(
- f"{self.client.url}/index/{guid}/rev={revision}",
+ f"{self.client.url}/index/{guid}?rev={revision}",
json=record,
- headers={"content-type": "application/json"},
+ headers=headers,
ssl=_ssl,
- auth=self.client.auth,
) as response:
- raise_for_status_and_print_error(response)
+ assert response.status == 200, await response.json()
response = await response.json()
return response
+
### Delete Requests
-[docs] @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
+
+[docs]
+ @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
def delete_record(self, guid):
- """
+ """
Delete an entry from the index
@@ -809,11 +910,14 @@ Source code for gen3.index
rec.delete()
return rec
+
### Query Requests
-[docs] @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
+
+[docs]
+ @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
def query_urls(self, pattern):
- """
+ """
Query all record URLs for given pattern
@@ -827,9 +931,12 @@ Source code for gen3.index
raise_for_status_and_print_error(response)
return response.json()
-[docs] @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
+
+
+[docs]
+ @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
async def async_query_urls(self, pattern, _ssl=None):
- """
+ """
Asynchronous function to query urls from indexd.
Args:
@@ -847,11 +954,14 @@ Source code for gen3.index
return response
+
## Mint GUID Requests
-[docs] @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
+
+[docs]
+ @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
def get_valid_guids(self, count=None):
- """
+ """
Get a list of valid GUIDs without indexing
Args:
count (int): number of GUIDs to request
@@ -866,16 +976,21 @@ Source code for gen3.index
response.raise_for_status()
return response.json().get("guids", [])
-[docs] @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
+
+
+[docs]
+ @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
def get_guids_prefix(self):
- """
+ """
Get the prefix for GUIDs if there is one
Returns:
str: prefix for this instance
"""
response = self.client._get("/guid/prefix")
response.raise_for_status()
- return response.json().get("prefix")
+ return response.json().get("prefix")
+
+
def _print_func_name(function):
diff --git a/docs/_build/html/_modules/gen3/jobs.html b/docs/_build/html/_modules/gen3/jobs.html
index 0e0d45791..167c0f35c 100644
--- a/docs/_build/html/_modules/gen3/jobs.html
+++ b/docs/_build/html/_modules/gen3/jobs.html
@@ -1,18 +1,15 @@
-
-
+
gen3.jobs — Gen3 SDK documentation
-
-
-
-
-
-
-
+
+
+
+
+
@@ -62,8 +59,10 @@ Source code for gen3.jobs
logging = get_logger("__name__")
-[docs]class Gen3Jobs:
- """
+
+[docs]
+class Gen3Jobs:
+ """
A class for interacting with the Gen3's Job Dispatching Service(s).
Examples:
@@ -75,7 +74,7 @@ Source code for gen3.jobs
"""
def __init__(self, endpoint=None, auth_provider=None, service_location="job"):
- """
+ """
Initialization for instance of the class to setup basic endpoint info.
Args:
@@ -98,8 +97,10 @@ Source code for gen3.jobs
self.endpoint = endpoint.rstrip("/")
self._auth_provider = auth_provider
-[docs] async def async_run_job_and_wait(self, job_name, job_input, _ssl=None, **kwargs):
- """
+
+[docs]
+ async def async_run_job_and_wait(self, job_name, job_input, _ssl=None, **kwargs):
+ """
Asynchronous function to create a job, wait for output, and return. Will
sleep in a linear delay until the job is done, starting with 1 second.
@@ -130,8 +131,11 @@ Source code for gen3.jobs
response = await self.async_get_output(job_create_response.get("uid"))
return response
-[docs] def is_healthy(self):
- """
+
+
+[docs]
+ def is_healthy(self):
+ """
Return if is healthy or not
Returns:
@@ -148,9 +152,12 @@ Source code for gen3.jobs
return response.text == "Healthy"
-[docs] @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
+
+
+[docs]
+ @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
def get_version(self):
- """
+ """
Return the version
Returns:
@@ -160,18 +167,24 @@ Source code for gen3.jobs
raise_for_status_and_print_error(response)
return response.json().get("version")
-[docs] @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
+
+
+[docs]
+ @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
def list_jobs(self):
- """
+ """
List all jobs
"""
response = requests.get(self.endpoint + "/list", auth=self._auth_provider)
raise_for_status_and_print_error(response)
return response.json()
-[docs] @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
+
+
+[docs]
+ @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
def create_job(self, job_name, job_input):
- """
+ """
Create a job with given name and input
Args:
@@ -188,6 +201,7 @@ Source code for gen3.jobs
raise_for_status_and_print_error(response)
return response.json()
+
@backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
async def async_create_job(self, job_name, job_input, _ssl=None, **kwargs):
async with aiohttp.ClientSession() as session:
@@ -207,9 +221,11 @@ Source code for gen3.jobs
response = await response.json(content_type=None)
return response
-[docs] @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
+
+[docs]
+ @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
def get_status(self, job_id):
- """
+ """
Get the status of a previously created job
"""
response = requests.get(
@@ -218,6 +234,7 @@ Source code for gen3.jobs
raise_for_status_and_print_error(response)
return response.json()
+
@backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
async def async_get_status(self, job_id, _ssl=None, **kwargs):
async with aiohttp.ClientSession() as session:
@@ -235,9 +252,11 @@ Source code for gen3.jobs
response = await response.json(content_type=None)
return response
-[docs] @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
+
+[docs]
+ @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
def get_output(self, job_id):
- """
+ """
Get the output of a previously completed job
"""
response = requests.get(
@@ -246,6 +265,7 @@ Source code for gen3.jobs
raise_for_status_and_print_error(response)
return response.json()
+
@backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
async def async_get_output(self, job_id, _ssl=None, **kwargs):
async with aiohttp.ClientSession() as session:
@@ -262,6 +282,7 @@ Source code for gen3.jobs
raise_for_status_and_print_error(response)
response = await response.json(content_type=None)
return response
+
diff --git a/docs/_build/html/_modules/gen3/metadata.html b/docs/_build/html/_modules/gen3/metadata.html
index d78515824..0de1a5985 100644
--- a/docs/_build/html/_modules/gen3/metadata.html
+++ b/docs/_build/html/_modules/gen3/metadata.html
@@ -1,18 +1,15 @@
-
-
+
gen3.metadata — Gen3 SDK documentation
-
-
-
-
-
-
-
+
+
+
+
+
@@ -89,8 +86,10 @@ Source code for gen3.metadata
}
-[docs]class Gen3Metadata:
- """
+
+[docs]
+class Gen3Metadata:
+ """
A class for interacting with the Gen3 Metadata services.
Examples:
@@ -112,7 +111,7 @@ Source code for gen3.metadata
service_location="mds",
admin_endpoint_suffix="-admin",
):
- """
+ """
Initialization for instance of the class to setup basic endpoint info.
Args:
@@ -142,8 +141,10 @@ Source code for gen3.metadata
self.admin_endpoint = endpoint.rstrip("/") + admin_endpoint_suffix
self._auth_provider = auth_provider
-[docs] def is_healthy(self):
- """
+
+[docs]
+ def is_healthy(self):
+ """
Return if is healthy or not
Returns:
@@ -160,9 +161,12 @@ Source code for gen3.metadata
return response.json().get("status") == "OK"
-[docs] @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
+
+
+[docs]
+ @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
def get_version(self):
- """
+ """
Return the version
Returns:
@@ -172,9 +176,12 @@ Source code for gen3.metadata
response.raise_for_status()
return response.text
-[docs] @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
+
+
+[docs]
+ @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
def get_index_key_paths(self):
- """
+ """
List all the metadata key paths indexed in the database.
Returns:
@@ -186,9 +193,12 @@ Source code for gen3.metadata
response.raise_for_status()
return response.json()
-[docs] @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
+
+
+[docs]
+ @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
def create_index_key_path(self, path):
- """
+ """
Create a metadata key path indexed in the database.
Args:
@@ -200,9 +210,12 @@ Source code for gen3.metadata
response.raise_for_status()
return response.json()
-[docs] @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
+
+
+[docs]
+ @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
def delete_index_key_path(self, path):
- """
+ """
List all the metadata key paths indexed in the database.
Args:
@@ -214,7 +227,10 @@ Source code for gen3.metadata
response.raise_for_status()
return response
-[docs] @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
+
+
+[docs]
+ @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
def query(
self,
query,
@@ -224,7 +240,7 @@ Source code for gen3.metadata
use_agg_mds=False,
**kwargs,
):
- """
+ """
Query the metadata given a query.
Query format is based off the logic used in the service:
@@ -281,9 +297,12 @@ Source code for gen3.metadata
return response.json()
-[docs] @backoff.on_exception(backoff.expo, Exception, **BACKOFF_NO_LOG_IF_NOT_RETRIED)
+
+
+[docs]
+ @backoff.on_exception(backoff.expo, Exception, **BACKOFF_NO_LOG_IF_NOT_RETRIED)
async def async_get(self, guid, _ssl=None, **kwargs):
- """
+ """
Asynchronous function to get metadata
Args:
@@ -305,9 +324,12 @@ Source code for gen3.metadata
return response
-[docs] @backoff.on_exception(backoff.expo, Exception, **BACKOFF_NO_LOG_IF_NOT_RETRIED)
+
+
+[docs]
+ @backoff.on_exception(backoff.expo, Exception, **BACKOFF_NO_LOG_IF_NOT_RETRIED)
def get(self, guid, **kwargs):
- """
+ """
Get the metadata associated with the guid
Args:
guid (str): guid to use
@@ -323,9 +345,12 @@ Source code for gen3.metadata
return response.json()
-[docs] @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
+
+
+[docs]
+ @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
def batch_create(self, metadata_list, overwrite=True, **kwargs):
- """
+ """
Create the list of metadata associated with the list of guids
Args:
@@ -356,9 +381,12 @@ Source code for gen3.metadata
return response.json()
-[docs] @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
- def create(self, guid, metadata, overwrite=False, **kwargs):
- """
+
+
+[docs]
+ @backoff.on_exception(backoff.expo, Exception, **BACKOFF_NO_LOG_IF_NOT_RETRIED)
+ def create(self, guid, metadata, aliases=None, overwrite=False, **kwargs):
+ """
Create the metadata associated with the guid
Args:
@@ -367,6 +395,8 @@ Source code for gen3.metadata
attached to the provided GUID as metadata
overwrite (bool, optional): whether or not to overwrite existing data
"""
+ aliases = aliases or []
+
url = self.admin_endpoint + f"/metadata/{guid}"
url_with_params = append_query_params(url, overwrite=overwrite, **kwargs)
@@ -377,9 +407,23 @@ Source code for gen3.metadata
)
response.raise_for_status()
+ if aliases:
+ try:
+ self.create_aliases(guid=guid, aliases=aliases, merge=overwrite)
+ except Exception:
+ logging.error(
+ "Error while attempting to create aliases: "
+ f"'{aliases}' to GUID: '{guid}' with merge={overwrite}. "
+ "GUID metadata record was created successfully and "
+ "will NOT be deleted."
+ )
+
return response.json()
-[docs] @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
+
+
+[docs]
+ @backoff.on_exception(backoff.expo, Exception, **BACKOFF_NO_LOG_IF_NOT_RETRIED)
async def async_create(
self,
guid,
@@ -389,7 +433,7 @@ Source code for gen3.metadata
_ssl=None,
**kwargs,
):
- """
+ """
Asynchronous function to create metadata
Args:
@@ -400,7 +444,7 @@ Source code for gen3.metadata
_ssl (None, optional): whether or not to use ssl
"""
aliases = aliases or []
- # todo handle aliases
+
async with aiohttp.ClientSession() as session:
url = self.admin_endpoint + f"/metadata/{guid}"
url_with_params = append_query_params(url, overwrite=overwrite, **kwargs)
@@ -417,11 +461,28 @@ Source code for gen3.metadata
response.raise_for_status()
response = await response.json()
+ if aliases:
+ logging.info(f"creating aliases: {aliases}")
+ try:
+ await self.async_create_aliases(
+ guid=guid, aliases=aliases, _ssl=_ssl
+ )
+ except Exception:
+ logging.error(
+ "Error while attempting to create aliases: "
+ f"'{aliases}' to GUID: '{guid}'. "
+ "GUID metadata record was created successfully and "
+ "will NOT be deleted."
+ )
+
return response
-[docs] @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
- def update(self, guid, metadata, **kwargs):
- """
+
+
+[docs]
+ @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
+ def update(self, guid, metadata, aliases=None, merge=False, **kwargs):
+ """
Update the metadata associated with the guid
Args:
@@ -429,6 +490,8 @@ Source code for gen3.metadata
metadata (Dict): dictionary representing what will end up a JSON blob
attached to the provided GUID as metadata
"""
+ aliases = aliases or []
+
url = self.admin_endpoint + f"/metadata/{guid}"
url_with_params = append_query_params(url, **kwargs)
@@ -439,27 +502,44 @@ Source code for gen3.metadata
)
response.raise_for_status()
+ if aliases:
+ try:
+ self.update_aliases(guid=guid, aliases=aliases, merge=merge)
+ except Exception:
+ logging.error(
+ "Error while attempting to update aliases: "
+ f"'{aliases}' to GUID: '{guid}'. "
+ "GUID metadata record was created successfully and "
+ "will NOT be deleted."
+ )
+
return response.json()
-[docs] @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
+
+
+[docs]
+ @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
async def async_update(
self, guid, metadata, aliases=None, merge=False, _ssl=None, **kwargs
):
- """
+ """
Asynchronous function to update metadata
Args:
guid (str): guid to use
metadata (Dict): dictionary representing what will end up a JSON blob
attached to the provided GUID as metadata
+ aliases (list[str], optional): List of aliases to update the GUID with
+ merge (bool, optional): Whether or not to merge metadata AND aliases
+ with existing values
_ssl (None, optional): whether or not to use ssl
+ **kwargs: Description
"""
- # TODO handle aliases
+ aliases = aliases or []
+
async with aiohttp.ClientSession() as session:
url = self.admin_endpoint + f"/metadata/{guid}"
- if merge:
- url += "?merge=True"
- url_with_params = append_query_params(url, **kwargs)
+ url_with_params = append_query_params(url, merge=merge, **kwargs)
# aiohttp only allows basic auth with their built in auth, so we
# need to manually add JWT auth header
@@ -471,11 +551,27 @@ Source code for gen3.metadata
response.raise_for_status()
response = await response.json()
+ if aliases:
+ try:
+ await self.async_update_aliases(
+ guid=guid, aliases=aliases, merge=merge, _ssl=_ssl
+ )
+ except Exception:
+ logging.error(
+ "Error while attempting to update aliases: "
+ f"'{aliases}' to GUID: '{guid}' with merge={merge}. "
+ "GUID metadata record was created successfully and "
+ "will NOT be deleted."
+ )
+
return response
-[docs] @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
+
+
+[docs]
+ @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
def delete(self, guid, **kwargs):
- """
+ """
Delete the metadata associated with the guid
Args:
@@ -490,10 +586,366 @@ Source code for gen3.metadata
return response.json()
+
+ #
+ # Alias Support
+ #
+
+
+[docs]
+ @backoff.on_exception(backoff.expo, Exception, **BACKOFF_NO_LOG_IF_NOT_RETRIED)
+ def get_aliases(self, guid, **kwargs):
+ """
+ Get Aliases for the given guid
+
+ Args:
+ guid (TYPE): Globally unique ID for the metadata blob
+ **kwargs: additional query params
+
+ Returns:
+ requests.Response: response from the request to get aliases
+ """
+ url = self.endpoint + f"/metadata/{guid}/aliases"
+ url_with_params = append_query_params(url, **kwargs)
+
+ logging.debug(f"hitting: {url_with_params}")
+ response = requests.get(url_with_params, auth=self._auth_provider)
+ response.raise_for_status()
+
+ return response.json()
+
+
+
+[docs]
+ @backoff.on_exception(backoff.expo, Exception, **BACKOFF_NO_LOG_IF_NOT_RETRIED)
+ async def async_get_aliases(self, guid, _ssl=None, **kwargs):
+ """
+ Asyncronously get Aliases for the given guid
+
+ Args:
+ guid (TYPE): Globally unique ID for the metadata blob
+ _ssl (None, optional): whether or not to use ssl
+ **kwargs: additional query params
+
+ Returns:
+ requests.Response: response from the request to get aliases
+ """
+ async with aiohttp.ClientSession() as session:
+ url = self.endpoint + f"/metadata/{guid}/aliases"
+ url_with_params = append_query_params(url, **kwargs)
+
+ # aiohttp only allows basic auth with their built in auth, so we
+ # need to manually add JWT auth header
+ headers = {"Authorization": self._auth_provider._get_auth_value()}
+
+ logging.debug(f"hitting: {url_with_params}")
+ async with session.get(
+ url_with_params, headers=headers, ssl=_ssl
+ ) as response:
+ response.raise_for_status()
+
+ return await response.json()
+
+
+ @backoff.on_exception(backoff.expo, Exception, **BACKOFF_NO_LOG_IF_NOT_RETRIED)
+ def delete_alias(self, guid, alias, **kwargs):
+ """
+ Delete single Alias for the given guid
+
+ Args:
+ guid (TYPE): Globally unique ID for the metadata blob
+ **kwargs: additional query params
+
+ Returns:
+ requests.Response: response from the request to delete aliases
+ """
+ url = self.admin_endpoint + f"/metadata/{guid}/aliases/{alias}"
+ url_with_params = append_query_params(url, **kwargs)
+
+ logging.debug(f"hitting: {url_with_params}")
+ response = requests.delete(url_with_params, auth=self._auth_provider)
+ response.raise_for_status()
+
+ return response.json()
+
+ @backoff.on_exception(backoff.expo, Exception, **BACKOFF_NO_LOG_IF_NOT_RETRIED)
+ async def async_delete_alias(self, guid, alias, _ssl=None, **kwargs):
+ """
+ Asyncronously delete single Aliases for the given guid
+
+ Args:
+ guid (TYPE): Globally unique ID for the metadata blob
+ _ssl (None, optional): whether or not to use ssl
+ **kwargs: additional query params
+
+ Returns:
+ requests.Response: response from the request to delete aliases
+ """
+ async with aiohttp.ClientSession() as session:
+ url = self.admin_endpoint + f"/metadata/{guid}/aliases/{alias}"
+ url_with_params = append_query_params(url, **kwargs)
+
+ # aiohttp only allows basic auth with their built in auth, so we
+ # need to manually add JWT auth header
+ headers = {"Authorization": self._auth_provider._get_auth_value()}
+
+ logging.debug(f"hitting: {url_with_params}")
+ async with session.delete(
+ url_with_params, headers=headers, ssl=_ssl
+ ) as response:
+ response.raise_for_status()
+
+ return await response.json()
+
+
+[docs]
+ @backoff.on_exception(backoff.expo, Exception, **BACKOFF_NO_LOG_IF_NOT_RETRIED)
+ def create_aliases(self, guid, aliases, **kwargs):
+ """
+ Create Aliases for the given guid
+
+ Args:
+ guid (TYPE): Globally unique ID for the metadata blob
+ aliases (list[str]): Aliases to set for the guid
+ **kwargs: additional query params
+
+ Returns:
+ requests.Response: response from the request to create aliases
+ """
+ url = self.admin_endpoint + f"/metadata/{guid}/aliases"
+ url_with_params = append_query_params(url, **kwargs)
+
+ data = {"aliases": aliases}
+
+ logging.debug(f"hitting: {url_with_params}")
+ logging.debug(f"data: {data}")
+ response = requests.post(url_with_params, json=data, auth=self._auth_provider)
+ response.raise_for_status()
+
+ return response.json()
+
+
+
+[docs]
+ @backoff.on_exception(backoff.expo, Exception, **BACKOFF_NO_LOG_IF_NOT_RETRIED)
+ async def async_create_aliases(self, guid, aliases, _ssl=None, **kwargs):
+ """
+ Asyncronously create Aliases for the given guid
+
+ Args:
+ guid (TYPE): Globally unique ID for the metadata blob
+ aliases (list[str]): Aliases to set for the guid
+ _ssl (None, optional): whether or not to use ssl
+ **kwargs: additional query params
+
+ Returns:
+ requests.Response: response from the request to create aliases
+ """
+ async with aiohttp.ClientSession() as session:
+ url = self.admin_endpoint + f"/metadata/{guid}/aliases"
+ url_with_params = append_query_params(url, **kwargs)
+
+ # aiohttp only allows basic auth with their built in auth, so we
+ # need to manually add JWT auth header
+ headers = {"Authorization": self._auth_provider._get_auth_value()}
+
+ data = {"aliases": aliases}
+
+ logging.debug(f"hitting: {url_with_params}")
+ logging.debug(f"data: {data}")
+ async with session.post(
+ url_with_params, json=data, headers=headers, ssl=_ssl
+ ) as response:
+ response.raise_for_status()
+ return await response.json()
+
+
+
+[docs]
+ @backoff.on_exception(backoff.expo, Exception, **BACKOFF_NO_LOG_IF_NOT_RETRIED)
+ def update_aliases(self, guid, aliases, merge=False, **kwargs):
+ """
+ Update Aliases for the given guid
+
+ Args:
+ guid (TYPE): Globally unique ID for the metadata blob
+ aliases (list[str]): Aliases to set for the guid
+ merge (bool, optional): Whether or not to aliases with existing values
+ **kwargs: additional query params
+
+ Returns:
+ requests.Response: response from the request to update aliases
+ """
+ url = self.admin_endpoint + f"/metadata/{guid}/aliases"
+ url_with_params = append_query_params(url, merge=merge, **kwargs)
+
+ data = {"aliases": aliases}
+
+ logging.debug(f"hitting: {url_with_params}")
+ logging.debug(f"data: {data}")
+ response = requests.put(url_with_params, json=data, auth=self._auth_provider)
+ response.raise_for_status()
+
+ return response.json()
+
+
+
+[docs]
+ @backoff.on_exception(backoff.expo, Exception, **BACKOFF_NO_LOG_IF_NOT_RETRIED)
+ async def async_update_aliases(
+ self, guid, aliases, merge=False, _ssl=None, **kwargs
+ ):
+ """
+ Asyncronously update Aliases for the given guid
+
+ Args:
+ guid (TYPE): Globally unique ID for the metadata blob
+ aliases (list[str]): Aliases to set for the guid
+ merge (bool, optional): Whether or not to aliases with existing values
+ _ssl (None, optional): whether or not to use ssl
+ **kwargs: additional query params
+
+ Returns:
+ requests.Response: response from the request to update aliases
+ """
+ async with aiohttp.ClientSession() as session:
+ url = self.admin_endpoint + f"/metadata/{guid}/aliases"
+ url_with_params = append_query_params(url, merge=merge, **kwargs)
+
+ # aiohttp only allows basic auth with their built in auth, so we
+ # need to manually add JWT auth header
+ headers = {"Authorization": self._auth_provider._get_auth_value()}
+
+ data = {"aliases": aliases}
+
+ logging.debug(f"hitting: {url_with_params}")
+ logging.debug(f"data: {data}")
+ async with session.put(
+ url_with_params, json=data, headers=headers, ssl=_ssl
+ ) as response:
+ response.raise_for_status()
+
+ return await response.json()
+
+
+
+[docs]
+ @backoff.on_exception(backoff.expo, Exception, **BACKOFF_NO_LOG_IF_NOT_RETRIED)
+ def delete_aliases(self, guid, **kwargs):
+ """
+ Delete all Aliases for the given guid
+
+ Args:
+ guid (TYPE): Globally unique ID for the metadata blob
+ **kwargs: additional query params
+
+ Returns:
+ requests.Response: response from the request to delete aliases
+ """
+ url = self.admin_endpoint + f"/metadata/{guid}/aliases"
+ url_with_params = append_query_params(url, **kwargs)
+
+ logging.debug(f"hitting: {url_with_params}")
+ response = requests.delete(url_with_params, auth=self._auth_provider)
+ response.raise_for_status()
+
+ return response.text
+
+
+
+[docs]
+ @backoff.on_exception(backoff.expo, Exception, **BACKOFF_NO_LOG_IF_NOT_RETRIED)
+ async def async_delete_aliases(self, guid, _ssl=None, **kwargs):
+ """
+ Asyncronously delete all Aliases for the given guid
+
+ Args:
+ guid (TYPE): Globally unique ID for the metadata blob
+ _ssl (None, optional): whether or not to use ssl
+ **kwargs: additional query params
+
+ Returns:
+ requests.Response: response from the request to delete aliases
+ """
+ async with aiohttp.ClientSession() as session:
+ url = self.admin_endpoint + f"/metadata/{guid}/aliases"
+ url_with_params = append_query_params(url, **kwargs)
+
+ # aiohttp only allows basic auth with their built in auth, so we
+ # need to manually add JWT auth header
+ headers = {"Authorization": self._auth_provider._get_auth_value()}
+
+ logging.debug(f"hitting: {url_with_params}")
+ async with session.delete(
+ url_with_params, headers=headers, ssl=_ssl
+ ) as response:
+ response.raise_for_status()
+
+ return await response.text
+
+
+
+[docs]
+ @backoff.on_exception(backoff.expo, Exception, **BACKOFF_NO_LOG_IF_NOT_RETRIED)
+ def delete_alias(self, guid, alias, **kwargs):
+ """
+ Delete single Alias for the given guid
+
+ Args:
+ guid (TYPE): Globally unique ID for the metadata blob
+ alias (str): alternative identifier (alias) to delete
+ **kwargs: additional query params
+
+ Returns:
+ requests.Response: response from the request to delete aliases
+ """
+ url = self.admin_endpoint + f"/metadata/{guid}/aliases/{alias}"
+ url_with_params = append_query_params(url, **kwargs)
+
+ logging.debug(f"hitting: {url_with_params}")
+ response = requests.delete(url_with_params, auth=self._auth_provider)
+ response.raise_for_status()
+
+ return response.text
+
+
+
+[docs]
+ @backoff.on_exception(backoff.expo, Exception, **BACKOFF_NO_LOG_IF_NOT_RETRIED)
+ async def async_delete_alias(self, guid, alias, _ssl=None, **kwargs):
+ """
+ Asyncronously delete single Aliases for the given guid
+
+ Args:
+ guid (str): Globally unique ID for the metadata blob
+ alias (str): alternative identifier (alias) to delete
+ _ssl (None, optional): whether or not to use ssl
+ **kwargs: additional query params
+
+ Returns:
+ requests.Response: response from the request to delete aliases
+ """
+ async with aiohttp.ClientSession() as session:
+ url = self.admin_endpoint + f"/metadata/{guid}/aliases/{alias}"
+ url_with_params = append_query_params(url, **kwargs)
+
+ # aiohttp only allows basic auth with their built in auth, so we
+ # need to manually add JWT auth header
+ headers = {"Authorization": self._auth_provider._get_auth_value()}
+
+ logging.debug(f"hitting: {url_with_params}")
+ async with session.delete(
+ url_with_params, headers=headers, ssl=_ssl
+ ) as response:
+ response.raise_for_status()
+
+ return await response.text
+
+
def _prepare_metadata(
self, metadata, indexd_doc, force_metadata_columns_even_if_empty
):
- """
+ """
Validate and generate the provided metadata for submission to the metadata
service.
@@ -509,7 +961,7 @@ Source code for gen3.metadata
"""
def _extract_non_indexd_metadata(metadata):
- """
+ """
Get the "additional metadata": metadata that was provided but is
not stored in indexd, so should be stored in the metadata service.
"""
@@ -575,7 +1027,7 @@ Source code for gen3.metadata
def _get_package_metadata(
self, submitted_metadata, file_name, file_size, hashes, urls, contents
):
- """
+ """
The MDS Objects API currently expects files that have not been
uploaded yet. For files we only needs to index, not upload, create
object records manually by generating the expected object fields.
@@ -617,6 +1069,7 @@ Source code for gen3.metadata
"_upload_status": "uploaded",
}
return metadata
+
diff --git a/docs/_build/html/_modules/gen3/object.html b/docs/_build/html/_modules/gen3/object.html
index 3a354f674..d2bc41529 100644
--- a/docs/_build/html/_modules/gen3/object.html
+++ b/docs/_build/html/_modules/gen3/object.html
@@ -1,18 +1,15 @@
-
-
+
gen3.object — Gen3 SDK documentation
-
-
-
-
-
-
-
+
+
+
+
+
@@ -40,8 +37,10 @@ Source code for gen3.object
pass
-[docs]class Gen3Object:
- """For interacting with Gen3 object level features.
+
+[docs]
+class Gen3Object:
+ """For interacting with Gen3 object level features.
A class for interacting with the Gen3 object services.
Currently allows creating and deleting of an object from the Gen3 System.
@@ -79,8 +78,10 @@ Source code for gen3.object
data = response.json()
return data["guid"], data["upload_url"]
-[docs] def delete_object(self, guid, delete_file_locations=False):
- """
+
+[docs]
+ def delete_object(self, guid, delete_file_locations=False):
+ """
Delete the object from indexd, metadata service and optionally all storage locations
Args:
@@ -98,7 +99,9 @@ Source code for gen3.object
+ delete_param
)
response = requests.delete(url, auth=self._auth_provider)
- raise_for_status_and_print_error(response)
+ raise_for_status_and_print_error(response)
+
+
diff --git a/docs/_build/html/_modules/gen3/query.html b/docs/_build/html/_modules/gen3/query.html
index 59180c1fe..9079f45d7 100644
--- a/docs/_build/html/_modules/gen3/query.html
+++ b/docs/_build/html/_modules/gen3/query.html
@@ -1,18 +1,15 @@
-
-
+
gen3.query — Gen3 SDK documentation
-
-
-
-
-
-
-
+
+
+
+
+
@@ -37,8 +34,10 @@ Source code for gen3.query
from gen3.utils import raise_for_status_and_print_error
-[docs]class Gen3Query:
- """
+
+[docs]
+class Gen3Query:
+ """
Query ElasticSearch data from a Gen3 system.
Args:
@@ -55,7 +54,9 @@ Source code for gen3.query
def __init__(self, auth_provider):
self._auth_provider = auth_provider
-[docs] def query(
+
+[docs]
+ def query(
self,
data_type,
fields,
@@ -67,7 +68,7 @@ Source code for gen3.query
accessibility=None,
verbose=True,
):
- """
+ """
Execute a query against a Data Commons.
Args:
@@ -129,22 +130,25 @@ Source code for gen3.query
sorts = [f'{{{field}: "{val}"}}' for field, val in sort_object.items()]
sort_string = f'[{", ".join(sorts)}]'
- query_string = f"""query($filter: JSON) {{
+ query_string = f"""query($filter: JSON) {{
{data_type}(
first: {first},
offset: {offset},
sort: {sort_string},
accessibility: {accessibility},
filter: $filter
- ) {{
- {" ".join(fields)}
- }}
+ ) {{
+ {" ".join(fields)}
+ }}
}}"""
variables = {"filter": filter_object}
return self.graphql_query(query_string=query_string, variables=variables)
-[docs] def graphql_query(self, query_string, variables=None):
- """
+
+
+[docs]
+ def graphql_query(self, query_string, variables=None):
+ """
Execute a GraphQL query against a Data Commons.
Args:
@@ -177,7 +181,10 @@ Source code for gen3.query
print(f"Did not receive JSON: {response.text}")
raise
-[docs] def raw_data_download(
+
+
+[docs]
+ def raw_data_download(
self,
data_type,
fields,
@@ -187,7 +194,7 @@ Source code for gen3.query
first=None,
offset=None,
):
- """
+ """
Execute a raw data download against a Data Commons.
Args:
@@ -248,7 +255,9 @@ Source code for gen3.query
if first:
data = data[:first]
- return data
+ return data
+
+
diff --git a/docs/_build/html/_modules/gen3/submission.html b/docs/_build/html/_modules/gen3/submission.html
index 0e3743175..fcebb439a 100644
--- a/docs/_build/html/_modules/gen3/submission.html
+++ b/docs/_build/html/_modules/gen3/submission.html
@@ -1,18 +1,15 @@
-
-
+
gen3.submission — Gen3 SDK documentation
-
-
-
-
-
-
-
+
+
+
+
+
@@ -56,8 +53,10 @@ Source code for gen3.submission
pass
-[docs]class Gen3Submission:
- """Submit/Export/Query data from a Gen3 Submission system.
+
+[docs]
+class Gen3Submission:
+ """Submit/Export/Query data from a Gen3 Submission system.
A class for interacting with the Gen3 submission services.
Supports submitting and exporting from Sheepdog.
@@ -81,22 +80,27 @@ Source code for gen3.submission
self._endpoint = self._auth_provider.endpoint
def __export_file(self, filename, output):
- """Writes an API response to a file."""
+ """Writes an API response to a file."""
with open(filename, "w") as outfile:
outfile.write(output)
print("\nOutput written to file: " + filename)
### Program functions
-[docs] def get_programs(self):
- """List registered programs"""
+
+[docs]
+ def get_programs(self):
+ """List registered programs"""
api_url = f"{self._endpoint}/api/v0/submission/"
output = requests.get(api_url, auth=self._auth_provider)
raise_for_status_and_print_error(output)
return output.json()
-[docs] def create_program(self, json):
- """Create a program.
+
+
+[docs]
+ def create_program(self, json):
+ """Create a program.
Args:
json (object): The json of the program to create
@@ -110,8 +114,11 @@ Source code for gen3.submission
raise_for_status_and_print_error(output)
return output.json()
-[docs] def delete_program(self, program):
- """Delete a program.
+
+
+[docs]
+ def delete_program(self, program):
+ """Delete a program.
This deletes an empty program from the commons.
@@ -129,10 +136,13 @@ Source code for gen3.submission
raise_for_status_and_print_error(output)
return output
+
### Project functions
-[docs] def get_projects(self, program):
- """List registered projects for a given program
+
+[docs]
+ def get_projects(self, program):
+ """List registered projects for a given program
Args:
program: the name of the program you want the projects from
@@ -148,8 +158,11 @@ Source code for gen3.submission
raise_for_status_and_print_error(output)
return output.json()
-[docs] def create_project(self, program, json):
- """Create a project.
+
+
+[docs]
+ def create_project(self, program, json):
+ """Create a project.
Args:
program (str): The program to create a project on
json (object): The json of the project to create
@@ -164,8 +177,11 @@ Source code for gen3.submission
raise_for_status_and_print_error(output)
return output.json()
-[docs] def delete_project(self, program, project):
- """Delete a project.
+
+
+[docs]
+ def delete_project(self, program, project):
+ """Delete a project.
This deletes an empty project from the commons.
@@ -184,8 +200,11 @@ Source code for gen3.submission
raise_for_status_and_print_error(output)
return output
-[docs] def get_project_dictionary(self, program, project):
- """Get dictionary schema for a given project
+
+
+[docs]
+ def get_project_dictionary(self, program, project):
+ """Get dictionary schema for a given project
Args:
program: the name of the program the project is from
@@ -201,8 +220,11 @@ Source code for gen3.submission
raise_for_status_and_print_error(output)
return output.json()
-[docs] def open_project(self, program, project):
- """Mark a project ``open``. Opening a project means uploads, deletions, etc. are allowed.
+
+
+[docs]
+ def open_project(self, program, project):
+ """Mark a project ``open``. Opening a project means uploads, deletions, etc. are allowed.
Args:
program: the name of the program the project is from
@@ -218,10 +240,13 @@ Source code for gen3.submission
raise_for_status_and_print_error(output)
return output.json()
+
### Record functions
-[docs] def submit_record(self, program, project, json):
- """Submit record(s) to a project as json.
+
+[docs]
+ def submit_record(self, program, project, json):
+ """Submit record(s) to a project as json.
Args:
program (str): The program to submit to.
@@ -241,8 +266,11 @@ Source code for gen3.submission
output.raise_for_status()
return output.json()
-[docs] def delete_record(self, program, project, uuid):
- """
+
+
+[docs]
+ def delete_record(self, program, project, uuid):
+ """
Delete a record from a project.
Args:
@@ -257,8 +285,11 @@ Source code for gen3.submission
"""
return self.delete_records(program, project, [uuid])
-[docs] def delete_records(self, program, project, uuids, batch_size=100):
- """
+
+
+[docs]
+ def delete_records(self, program, project, uuids, batch_size=100):
+ """
Delete a list of records from a project.
Args:
@@ -294,8 +325,11 @@ Source code for gen3.submission
raise
return output
-[docs] def delete_node(self, program, project, node_name, batch_size=100, verbose=True):
- """
+
+
+[docs]
+ def delete_node(self, program, project, node_name, batch_size=100, verbose=True):
+ """
Delete all records for a node from a project.
Args:
@@ -314,10 +348,13 @@ Source code for gen3.submission
program, project, [node_name], batch_size, verbose=verbose
)
-[docs] def delete_nodes(
+
+
+[docs]
+ def delete_nodes(
self, program, project, ordered_node_list, batch_size=100, verbose=True
):
- """
+ """
Delete all records for a list of nodes from a project.
Args:
@@ -338,10 +375,10 @@ Source code for gen3.submission
print(node, end="", flush=True)
first_uuid = ""
while True:
- query_string = f"""{{
- {node} (first: {batch_size}, project_id: "{project_id}") {{
+ query_string = f"""{{
+ {node} (first: {batch_size}, project_id: "{project_id}") {{
id
- }}
+ }}
}}"""
res = self.query(query_string)
uuids = [x["id"] for x in res["data"][node]]
@@ -356,8 +393,11 @@ Source code for gen3.submission
if verbose:
print()
-[docs] def export_record(self, program, project, uuid, fileformat, filename=None):
- """Export a single record into json.
+
+
+[docs]
+ def export_record(self, program, project, uuid, fileformat, filename=None):
+ """Export a single record into json.
Args:
program (str): The program the record is under.
@@ -392,8 +432,11 @@ Source code for gen3.submission
self.__export_file(filename, output)
return output
-[docs] def export_node(self, program, project, node_type, fileformat, filename=None):
- """Export all records in a single node type of a project.
+
+
+[docs]
+ def export_node(self, program, project, node_type, fileformat, filename=None):
+ """Export all records in a single node type of a project.
Args:
program (str): The program to which records belong.
@@ -428,10 +471,13 @@ Source code for gen3.submission
self.__export_file(filename, output)
return output
+
### Query functions
-[docs] def query(self, query_txt, variables=None, max_tries=1):
- """Execute a GraphQL query against a Data Commons.
+
+[docs]
+ def query(self, query_txt, variables=None, max_tries=1):
+ """Execute a GraphQL query against a Data Commons.
Args:
query_txt (str): Query text.
@@ -468,8 +514,11 @@ Source code for gen3.submission
return data
-[docs] def get_graphql_schema(self):
- """Returns the GraphQL schema for a commons.
+
+
+[docs]
+ def get_graphql_schema(self):
+ """Returns the GraphQL schema for a commons.
This runs the GraphQL introspection query against a commons and returns the results.
@@ -484,10 +533,13 @@ Source code for gen3.submission
data = json.loads(output)
return data
+
### Dictionary functions
-[docs] def get_dictionary_node(self, node_type):
- """Returns the dictionary schema for a specific node.
+
+[docs]
+ def get_dictionary_node(self, node_type):
+ """Returns the dictionary schema for a specific node.
This gets the current json dictionary schema for a specific node type in a commons.
@@ -507,8 +559,11 @@ Source code for gen3.submission
data = json.loads(output)
return data
-[docs] def get_dictionary_all(self):
- """Returns the entire dictionary object for a commons.
+
+
+[docs]
+ def get_dictionary_all(self):
+ """Returns the entire dictionary object for a commons.
This gets a json of the current dictionary schema for a commons.
@@ -520,10 +575,13 @@ Source code for gen3.submission
"""
return self.get_dictionary_node("_all")
+
### File functions
-[docs] def get_project_manifest(self, program, project):
- """Get a projects file manifest
+
+[docs]
+ def get_project_manifest(self, program, project):
+ """Get a projects file manifest
Args:
program: the name of the program the project is from
@@ -538,8 +596,11 @@ Source code for gen3.submission
output = requests.get(api_url, auth=self._auth_provider)
return output
-[docs] def submit_file(self, project_id, filename, chunk_size=30, row_offset=0):
- """Submit data in a spreadsheet file containing multiple records in rows to a Gen3 Data Commons.
+
+
+[docs]
+ def submit_file(self, project_id, filename, chunk_size=30, row_offset=0):
+ """Submit data in a spreadsheet file containing multiple records in rows to a Gen3 Data Commons.
Args:
project_id (str): The project_id to submit to.
@@ -601,7 +662,6 @@ Source code for gen3.submission
# Start the chunking loop:
while (start + len(chunk)) <= len(df):
-
timeout = False
valid_but_failed = []
invalid = []
@@ -633,7 +693,6 @@ Source code for gen3.submission
or "Connection aborted." in response
or "service failure - try again later" in response
): # time-out, response is not valid JSON at the moment
-
print("\t Reducing Chunk Size: {}".format(response))
results["responses"].append("Reducing Chunk Size: {}".format(response))
timeout = True
@@ -665,7 +724,6 @@ Source code for gen3.submission
)
elif json_res["code"] == 200: # success
-
entities = json_res.get("entities", [])
print("\t Succeeded: {} entities.".format(len(entities)))
results["responses"].append(
@@ -677,14 +735,12 @@ Source code for gen3.submission