Skip to content

Commit

Permalink
Merge pull request #58 from ropensci-review-tools/user-connections
Browse files Browse the repository at this point in the history
start 'R/analyse-users.R'
  • Loading branch information
mpadge authored Dec 16, 2024
2 parents bcd75d8 + 61e651d commit a234d33
Show file tree
Hide file tree
Showing 10 changed files with 237 additions and 4 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: repometrics
Title: Metrics for Your Code Repository
Version: 0.1.3.020
Version: 0.1.3.031
Authors@R:
person("Mark", "Padgham", , "[email protected]", role = c("aut", "cre"),
comment = c(ORCID = "0000-0003-2172-5265"))
Expand Down
135 changes: 135 additions & 0 deletions R/analyse-users.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
#' Construct user-by-user square matrices of strengths of relation between
#' users.
#'
#' @param user_data Result of `lapply(logins, repometrics_data_user)`.
#' Contains the following fields:
#' \enumerate{
#' \item general (not considered here)
#' \item commit_cmt Comments on commits
#' \item commits Commits to different repositories
#' \item followers GitHub followers
#' \item following Logins of people/orgs followed by user on GitHub
#' \item issue_cmts Comments on issues
#' \item issues Issues opened by user.
#' }
#' @return A `data.frame` of pairwise user logins, and proportions of overlap
#' betwen repositories in the six variables described above.
#' @noRd
user_relation_matrices <- function (user_data) {

user_names <- names (user_data)
user_data <- add_user_login_cols (user_data) |>
combine_user_data ()

# Pre-processing to name grouping column "repo" and count column "n":
user_data$commit_cmt$repo <-
paste0 (user_data$commit_cmt$org, user_data$commit_cmt$repo)

user_data$followers <-
dplyr::rename (user_data$followers, repo = followers) |>
dplyr::mutate (n = 1L)
user_data$following <-
dplyr::rename (user_data$following, repo = following) |>
dplyr::mutate (n = 1L)

user_data$issue_cmts <-
dplyr::rename (user_data$issue_cmts, repo = org_repo) |>
dplyr::group_by (repo, login) |>
dplyr::summarise (n = sum (num_comments), .groups = "keep")
user_data$issues <- dplyr::rename (user_data$issues, repo = org_repo) |>
dplyr::group_by (repo, login) |>
dplyr::summarise (n = dplyr::n (), .groups = "keep")

overlap <- lapply (names (user_data), function (n) {
user_data [[n]] <- user_relate_fields (user_data, user_names, what = n)
})

res <- dplyr::left_join (overlap [[1]], overlap [[2]], by = c ("login1", "login2")) |>
dplyr::left_join (overlap [[3]], by = c ("login1", "login2")) |>
dplyr::left_join (overlap [[4]], by = c ("login1", "login2")) |>
dplyr::left_join (overlap [[5]], by = c ("login1", "login2")) |>
dplyr::left_join (overlap [[6]], by = c ("login1", "login2"))

return (res)
}

#' Add 'login' columns to all user data, so each element can be combined.
#' @noRd
add_user_login_cols <- function (user_data) {

nms <- names (user_data)
res <- lapply (seq_along (user_data), function (u) {
nms_u <- names (user_data [[u]])
res_u <- lapply (seq_along (user_data [[u]]), function (i) {
ud <- user_data [[u]] [[i]]
if (is.data.frame (ud) && nrow (ud) > 0L) {
ud$login <- names (user_data) [u]
} else if (is.character (ud)) {
ud <- data.frame (ud, login = names (user_data) [u])
names (ud) [1] <- names (user_data [[u]]) [i]
}
return (ud)
})
names (res_u) <- nms_u

return (res_u)
})
names (res) <- nms

return (res)
}

#' Combine all individual elements of 'user_data' for all users.
#'
#' The `add_user_login_cols` enables all data to be `rbind`-ed here.
#' @noRd
combine_user_data <- function (user_data) {

data <- lapply (names (user_data [[1]]), function (n) {
these <- lapply (user_data, function (i) i [[n]])
res <- do.call (rbind, these)
rownames (res) <- NULL
return (res)
})

names (data) <- names (user_data [[1]])
data$general <- NULL

return (data)
}

user_relate_fields <- function (user_data, user_names, what = "commits") {

user_combs <- t (combn (user_names, m = 2L))
if (what == "commits") {
user_data [[what]] <- dplyr::rename (user_data [[what]], n = num_commits)
} else if (what == "commit_cmt") {
user_data$commit_cmt$n <- 1L
}

res <- apply (user_combs, 1, function (i) {
cmt1 <- dplyr::filter (user_data [[what]], login == i [1]) |>
dplyr::group_by (repo) |>
dplyr::summarise (n1 = sum (n))
cmt2 <- dplyr::filter (user_data [[what]], login == i [2]) |>
dplyr::group_by (repo) |>
dplyr::summarise (n2 = sum (n))
overlap <- dplyr::inner_join (cmt1, cmt2, by = "repo")

res <- 0
if (nrow (overlap) > 0L) {
res <- (sum (overlap$n1) + sum (overlap$n2)) /
(sum (cmt1$n1) + sum (cmt2$n2))
}
return (res)
})

res <- data.frame (
login1 = user_combs [, 1],
login2 = user_combs [, 2],
res
)
names (res) [3] <- what

return (res)
}
4 changes: 2 additions & 2 deletions R/data-gh-user.R
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,8 @@ gh_user_general_internal <- function (login = "",
name = org_name,
gh_org = org_gh_org,
url = org_url,
web_url = org_web_url,
location = org_location,
web_url = null2na_char (org_web_url),
location = null2na_char (org_location),
num_members = org_num_members
)

Expand Down
2 changes: 1 addition & 1 deletion codemeta.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
"codeRepository": "https://github.com/ropensci-review-tools/repometrics",
"issueTracker": "https://github.com/ropensci-review-tools/repometrics/issues",
"license": "https://spdx.org/licenses/GPL-3.0",
"version": "0.1.3.020",
"version": "0.1.3.031",
"programmingLanguage": {
"@type": "ComputerLanguage",
"name": "R",
Expand Down
82 changes: 82 additions & 0 deletions tests/testthat/helper-user-relations.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
# Mock version of data constructed in data-gh-user.R
mock_user_rel_data <- function () {

general <- list (
user = data.frame (
login = "me",
name = "me too",
email = "[email protected]",
location = "somewhere",
company = "noway",
bio = NA_character_,
avatarUrl = NA_character_,
num_repositories = 1L,
repos_contributed_to = 2L,
num_starred_repos = 3L
),
orgs = data.frame (
name = "org",
gh_org = "org",
url = "https://github.com/org",
web_url = NA_character_,
location = NA_character_,
num_members = 0L
)
)

randchars <- function (len = 6L) {
x <- sample (c (letters, LETTERS), size = len, replace = TRUE)
paste0 (x, collapse = "")
}
followers <- vapply (1:10, function (i) randchars (), character (1L))
following <- vapply (1:5, function (i) randchars (), character (1L))

timestamp <- as.POSIXct ("2024-01-01T00:00:01")
timestamp_minus_year <- as.POSIXct ("2023-01-01T00:00:01")

commits <- data.frame (
repo = paste0 ("org", c ("one", "two")),
num_commits = 1:2,
date = rep (timestamp, 2L)
)

commit_cmt <- data.frame (
repo = commits$repo,
num_commits = 1:2,
date = rep (timestamp, 2L)
)
attr (commit_cmt, "started_at") <- timestamp_minus_year
attr (commit_cmt, "ended_at") <- timestamp

issues <- data.frame (
opened_at = rep (timestamp, 2L),
closed_at = rep (timestamp, 2L),
org_repo = commits$repo,
issue_num = 1:2,
num_issue_comments = 3:4,
num_issue_participants = 5:6,
num_repo_languages = 7:8,
repo_languages = I (c ("R", "C"))
)
attr (issues, "started_at") <- timestamp_minus_year
attr (issues, "ended_at") <- timestamp

issue_cmts <- data.frame (
org_repo = commits$repo,
issue_num = 1:2,
created_at = rep (timestamp, 2L),
num_comments = 1:2,
num_participants = 3:4
)

# Then assemble all:
list (
general = general,
commit_cmt = commit_cmt,
commits = commits,
followers = followers,
following = following,
issue_cmts = issue_cmts,
issues = issues
)
}
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
16 changes: 16 additions & 0 deletions tests/testthat/test-data-user.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
test_that ("user data martrices", {

user_data <- lapply (1:2, function (i) mock_user_rel_data ())
names (user_data) <- c ("a", "b")

mats <- user_relation_matrices (user_data)

expect_s3_class (mats, "data.frame")
expect_equal (ncol (mats), 8L)
nms <- c (
"login1", "login2", "commit_cmt", "commits", "followers", "following",
"issue_cmts", "issues"
)
expect_equal (names (mats), nms)
expect_true (nrow (mats) > 0L)
})

0 comments on commit a234d33

Please sign in to comment.