From 7ba3251b7b7839d472fc8e48a49aff4cce55a80c Mon Sep 17 00:00:00 2001 From: Nikolai Sviridov Date: Thu, 7 Dec 2023 14:15:13 +0100 Subject: [PATCH] feat: update readme, change folders, increase sleep time --- README.md | 107 ++++++++++++++++-- .../main/kotlin/loader/gerrit/GerritLoader.kt | 2 +- .../kotlin/loader/gerrit/LoaderChanges.kt | 4 +- 3 files changed, 101 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 522db50..f1e6cc2 100644 --- a/README.md +++ b/README.md @@ -1,22 +1,111 @@ # MR-loader +> [!IMPORTANT] +> Crucial information. Tool is fully dependent on server response. Servers may differ and changed overtime. +> If you encounter errors in results, please report. + Tool for loading merge requests from Gerrit. Github functionality will be added later. +MR-loader workflow: + +* Identify `maxId` (the largest existing ID number of pull request). It's done by requesting a batch of most recent pull + requests. +* Iterate over all integers until `maxID` and send the requests via REST API to load all required information of code + changes with the specified ID. + Unmodified responses from server stored in `work_dir/gerrit/url_dir/changes` directory where each file map with + following structure `map[changeID] = jsonResponse`. +* Iterate over all loaded changes in `work_dir/gerrit/url_dir/changes` and load comments if needed + to `work_dir/gerrit/url_dir/comments`. + Files in `work_dir/gerrit/url_dir/comments` include map with following structure `map[changeID] = jsonResponse`. +* Iterate over all loaded changes in `work_dir/gerrit/url_dir/changes` and comments + in `work_dir/gerrit/url_dir/comments` to create a dataset in + `work_dir/dataset/gerrit/` + ## How to use -### CLI +### Docker -At the moment, CLI can be used for loading from Gerrit. +You can use tool as CLI via docker. -1. Run `./gradlew :cli:shadowJar` -2. Now you can use shell script to use cli `./run.sh` +First choose working folder for storing all the results. You can set it via mounting volume to `/root` of docker +container +`--volume ~/your/folder/path:/root`. After you need to call the `GerritLoad` command with arguments. List of +arguments you can get via: -The script should be executed as: -```shell script -sh ./run.sh GerritLoad options arguments +```shell scrip +docker run --volume ~/your/folder/path:/root -it ghcr.io/jetbrains-research/mr-loader/mr-loader:latest GerritLoad -h ``` -To get more info about options: +Example run to load all available changes from http://review.openstack.org : + ```shell script -sh ./run.sh GerritLoad -h +docker run --volume ~/your/folder/path:/root -it ghcr.io/jetbrains-research/mr-loader/mr-loader:latest GerritLoad --url http://review.openstack.org ``` + +## Dataset format + +All dataset parts stored in `work_dir/dataset/gerrit/` + +### Changes + +Stored in `work_dir/dataset/gerrit/changes`. + +| created_at | number | key_user | status | comment | key_change | updated_time | subject | +|------------|--------|----------|--------|---------|------------|--------------|---------| +| ... | ... | ... | ... | ... | ... | ... | ... | + +### Changes files + +Stored in `work_dir/dataset/gerrit/changes_files` + +| key_change | key_file | +|------------|----------| +| ... | ... | + +### Changes reviewer + +Stored in `work_dir/dataset/gerrit/changes_files` + +| key_change | key_user | +|------------|----------| +| ... | ... | + +### Commits + +Stored in `work_dir/dataset/gerrit/commits` + +| oid | committed_date | key_commit | key_change | +|-----|----------------|------------|------------| +| ... | ... | ... | ... | + +### Commits author + +Stored in `work_dir/dataset/gerrit/commits_author` + +| key_commit | author_key_user | committer_key_user | uploader_key_user | +|------------|-----------------|--------------------|-------------------| +| ... | ... | ... | ... | + +### Commits file + +Stored in `work_dir/dataset/gerrit/commits_file` + +| key_commit | key_file | lines_inserted | lines_deleted | size | size_delta | status | +|------------|----------|----------------|----------------|-------|------------|--------| +| ... | ... | ... | ... | ... | ... | ... | + +### Files + +Stored in `work_dir/dataset/gerrit/files` + +| path | key | +|------|-----| +| ... | ... | + +### Users + +Stored in `work_dir/dataset/gerrit/users` + +| name | email | login | +|------|-------|-------| +| ... | ... | ... | diff --git a/core/src/main/kotlin/loader/gerrit/GerritLoader.kt b/core/src/main/kotlin/loader/gerrit/GerritLoader.kt index c4f54b0..1b26cc5 100644 --- a/core/src/main/kotlin/loader/gerrit/GerritLoader.kt +++ b/core/src/main/kotlin/loader/gerrit/GerritLoader.kt @@ -31,7 +31,7 @@ class GerritLoader( private val datasetUrlDir = File(resultsDir, "dataset/gerrit/${baseUrlToDomain(baseUrl)}") private val writerProvider = WriterProvider() - private val urlDir = File(resultsDir, "changes/gerrit/${baseUrlToDomain(baseUrl)}") + private val urlDir = File(resultsDir, "gerrit/${baseUrlToDomain(baseUrl)}") suspend fun run( ignoreLoad: Boolean = false, diff --git a/core/src/main/kotlin/loader/gerrit/LoaderChanges.kt b/core/src/main/kotlin/loader/gerrit/LoaderChanges.kt index 6a9d0f4..a5da80d 100644 --- a/core/src/main/kotlin/loader/gerrit/LoaderChanges.kt +++ b/core/src/main/kotlin/loader/gerrit/LoaderChanges.kt @@ -243,8 +243,8 @@ class LoaderChanges( } errMsg.contains("429") -> { - logger.warning("$msg : Sleep for 1 minute. Error message contains 429.") - Thread.sleep(60_000) + logger.warning("$msg : Sleep for 5 minute. Error message contains 429.") + Thread.sleep(5 * 60_000) continue }