From 44eb6ea64f6f104d7fd9d880d78024c44b135f86 Mon Sep 17 00:00:00 2001 From: jhnwllr Date: Thu, 4 Jul 2024 12:17:01 +0000 Subject: [PATCH 1/3] Apply automatic changes --- 250-private-sector-table.csv | 2 +- 260-private-sector-totals.csv | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/250-private-sector-table.csv b/250-private-sector-table.csv index 596a886..6daadca 100644 --- a/250-private-sector-table.csv +++ b/250-private-sector-table.csv @@ -8,7 +8,7 @@ https://www.gbif.org/publisher/612c9b58-e739-4af4-a038-4b3901fa5649[Asplan Viak https://www.gbif.org/publisher/e62a5313-e771-4c81-b6d1-cba6e4085635[Aures Bajo],{Energy},{CO},2,368,47 https://www.gbif.org/publisher/83500190-21b6-445c-ab2c-c0565fc0afce[Awake Travel],{Consulting},{CO},1,8 644,21 https://www.gbif.org/publisher/eea64f26-8fd5-49fb-be7e-a1d4cfc051ee[Aïgos SAS],{Consulting},{CO},3,2 404,48 -https://www.gbif.org/publisher/b2c1126d-e3b4-4619-9f94-b236dcc0a947[Biofokus],{Consulting},{NO},1,670 653,1 133 +https://www.gbif.org/publisher/b2c1126d-e3b4-4619-9f94-b236dcc0a947[Biofokus],{Consulting},{NO},1,444 289,1 133 https://www.gbif.org/publisher/a41046bd-eaca-49bf-919b-419062ffc2a2[Biolog J.B. Jordal AS],{Consulting},{NO},1,177 814,684 https://www.gbif.org/publisher/8e6bc843-c1b4-4b10-b546-881f06049004[Biotica Consultores Ltda],{Consulting},{CO},4,1 318,219 https://www.gbif.org/publisher/14fb9c57-68a5-4870-b434-5355df7a9c3c[Carbones del Cerrejón Limited],{Materials},{CO},9,197 100,279 diff --git a/260-private-sector-totals.csv b/260-private-sector-totals.csv index 34f2048..226aca2 100644 --- a/260-private-sector-totals.csv +++ b/260-private-sector-totals.csv @@ -1 +1 @@ -595,9 606 840,10 745 +595,9 380 476,10 745 From bface135b3e41fdf66cdbdb9a1bbd53f37af7c79 Mon Sep 17 00:00:00 2001 From: Matthew Blissett Date: Mon, 15 Jul 2024 10:53:28 +0200 Subject: [PATCH 2/3] Exclude .history. --- .gitignore | 1 + .../.github/workflows/run_20240703144031.yml | 47 -------- .../.github/workflows/run_20240703144457.yml | 47 -------- .../.github/workflows/run_20240703144505.yml | 47 -------- ...0-private-sector-totals_20240704134811.csv | 1 - ...0-private-sector-totals_20240704134910.csv | 1 - ...0-private-sector-totals_20240704134934.csv | 1 - .../R/table_20240703144031.R | 74 ------------ .../R/table_20240703145945.R | 74 ------------ .../R/table_20240703150052.R | 74 ------------ .../R/table_20240703150534.R | 77 ------------- .../R/table_20240703150544.R | 80 ------------- .../R/table_20240703150553.R | 80 ------------- .../R/table_20240703150627.R | 80 ------------- .../R/table_20240703150636.R | 78 ------------- .../R/table_20240703150902.R | 80 ------------- .../R/table_20240703150914.R | 81 ------------- .../R/table_20240703150928.R | 81 ------------- .../R/table_20240703150958.R | 79 ------------- .../R/table_20240703151048.R | 80 ------------- .../R/table_20240703151213.R | 83 -------------- .../R/table_20240703151220.R | 83 -------------- .../R/table_20240703151245.R | 83 -------------- .../R/table_20240703151332.R | 84 -------------- .../R/table_20240703151345.R | 84 -------------- .../R/table_20240703151359.R | 85 -------------- .../R/table_20240703151427.R | 86 -------------- .../R/table_20240703151446.R | 86 -------------- .../R/table_20240703151447.R | 86 -------------- .../R/table_20240703151649.R | 86 -------------- .../R/table_20240703151658.R | 84 -------------- .../R/table_20240703151756.R | 86 -------------- .../R/table_20240703151802.R | 86 -------------- .../R/table_20240703152128.R | 86 -------------- .../R/table_20240703152158.R | 87 -------------- .../R/table_20240703152200.R | 87 -------------- .../R/table_20240703152202.R | 87 -------------- .../R/table_20240703152223.R | 89 --------------- .../R/table_20240703152232.R | 89 --------------- .../R/table_20240703152251.R | 89 --------------- .../R/table_20240704101656.R | 89 --------------- .../R/table_20240704101756.R | 89 --------------- .../R/table_20240704102106.R | 90 --------------- .../R/table_20240704102116.R | 90 --------------- .../R/table_20240704102119.R | 90 --------------- .../R/table_20240704102126.R | 90 --------------- .../R/table_20240704102131.R | 90 --------------- .../R/table_20240704102134.R | 92 --------------- .../R/table_20240704102142.R | 93 --------------- .../R/table_20240704102212.R | 94 --------------- .../R/table_20240704102214.R | 94 --------------- .../R/table_20240704102217.R | 94 --------------- .../R/table_20240704102229.R | 94 --------------- .../R/table_20240704102746.R | 94 --------------- .../R/table_20240704102752.R | 95 --------------- .../R/table_20240704102957.R | 95 --------------- .../R/table_20240704103001.R | 95 --------------- .../R/table_20240704103045.R | 96 ---------------- .../R/table_20240704103144.R | 96 ---------------- .../R/table_20240704103155.R | 96 ---------------- .../R/table_20240704103541.R | 86 -------------- .../R/table_20240704103554.R | 92 --------------- .../R/table_20240704103618.R | 92 --------------- .../R/table_20240704103624.R | 92 --------------- .../R/table_20240704103627.R | 93 --------------- .../R/table_20240704103830.R | 94 --------------- .../R/table_20240704103834.R | 94 --------------- .../R/table_20240704103835.R | 94 --------------- .../R/table_20240704103850.R | 95 --------------- .../R/table_20240704103908.R | 95 --------------- .../R/table_20240704104421.R | 96 ---------------- .../R/table_20240704104427.R | 97 ---------------- .../R/table_20240704104428.R | 97 ---------------- .../R/table_20240704104452.R | 97 ---------------- .../R/table_20240704104500.R | 99 ---------------- .../R/table_20240704104510.R | 99 ---------------- .../R/table_20240704104514.R | 99 ---------------- .../R/table_20240704104654.R | 99 ---------------- .../R/table_20240704104703.R | 99 ---------------- .../R/table_20240704104717.R | 100 ---------------- .../R/table_20240704104745.R | 100 ---------------- .../R/table_20240704104816.R | 102 ----------------- .../R/table_20240704104837.R | 102 ----------------- .../R/table_20240704104841.R | 102 ----------------- .../R/table_20240704104853.R | 103 ----------------- .../R/table_20240704105458.R | 103 ----------------- .../R/table_20240704105503.R | 103 ----------------- .../R/table_20240704105640.R | 103 ----------------- .../R/table_20240704111427.R | 103 ----------------- .../R/table_20240704111524.R | 103 ----------------- .../R/table_20240704111545.R | 104 ----------------- .../R/table_20240704111714.R | 104 ----------------- .../R/table_20240704111733.R | 104 ----------------- .../R/table_20240704111811.R | 104 ----------------- .../R/table_20240704111831.R | 104 ----------------- .../R/table_20240704111841.R | 104 ----------------- .../R/table_20240704111913.R | 104 ----------------- .../R/table_20240704111920.R | 104 ----------------- .../R/table_20240704112001.R | 97 ---------------- .../R/table_20240704112112.R | 97 ---------------- .../R/table_20240704112238.R | 97 ---------------- .../R/table_20240704112450.R | 97 ---------------- .../R/table_20240704112520.R | 97 ---------------- .../R/table_20240704112833.R | 97 ---------------- .../R/table_20240704112837.R | 97 ---------------- .../R/table_20240704113137.R | 99 ---------------- .../R/table_20240704113144.R | 99 ---------------- .../R/table_20240704113150.R | 99 ---------------- .../R/table_20240704113202.R | 99 ---------------- .../R/table_20240704113250.R | 99 ---------------- .../R/table_20240704113255.R | 99 ---------------- .../R/table_20240704113311.R | 99 ---------------- .../R/table_20240704113316.R | 99 ---------------- .../R/table_20240704113434.R | 100 ---------------- .../R/table_20240704113512.R | 97 ---------------- .../R/table_20240704114121.R | 99 ---------------- .../R/table_20240704114454.R | 99 ---------------- .../R/table_20240704114458.R | 99 ---------------- .../R/table_20240704114512.R | 101 ---------------- .../R/table_20240704114648.R | 103 ----------------- .../R/table_20240704115542.R | 108 ------------------ .../R/table_20240704115545.R | 108 ------------------ .../R/table_20240704120053.R | 108 ------------------ .../R/table_20240704120141.R | 106 ----------------- .../R/table_20240704120227.R | 106 ----------------- .../R/table_20240704120233.R | 106 ----------------- .../R/table_20240704120236.R | 106 ----------------- .../R/table_20240704120453.R | 106 ----------------- .../R/table_20240704120713.R | 106 ----------------- .../R/table_20240704120716.R | 106 ----------------- .../R/table_20240704120741.R | 103 ----------------- .../R/table_20240704120832.R | 106 ----------------- .../R/table_20240704120836.R | 105 ----------------- .../R/table_20240704120941.R | 106 ----------------- .../R/table_20240704120943.R | 106 ----------------- .../R/table_20240704120945.R | 106 ----------------- .../R/table_20240704121033.R | 106 ----------------- .../R/table_20240704121414.R | 108 ------------------ .../R/table_20240704121416.R | 108 ------------------ .../R/table_20240704121422.R | 108 ------------------ .../R/table_20240704121423.R | 108 ------------------ .../R/table_20240704121632.R | 108 ------------------ .../R/table_20240704121652.R | 106 ----------------- .../R/table_20240704121703.R | 106 ----------------- .../R/table_20240704121810.R | 106 ----------------- .../R/table_20240704122005.R | 108 ------------------ .../R/table_20240704122024.R | 108 ------------------ .../R/table_20240704122025.R | 108 ------------------ .../R/table_20240704122031.R | 108 ------------------ .../R/table_20240704122102.R | 107 ----------------- .../R/table_20240704122152.R | 106 ----------------- .../R/table_20240704122154.R | 106 ----------------- .../R/table_20240704122352.R | 106 ----------------- .../R/table_20240704122524.R | 105 ----------------- .../R/table_20240704122533.R | 106 ----------------- .../R/table_20240704122610.R | 106 ----------------- .../R/table_20240704122910.R | 106 ----------------- .../R/table_20240704123007.R | 106 ----------------- .../R/table_20240704123011.R | 106 ----------------- .../R/table_20240704123013.R | 106 ----------------- .../R/table_20240704123019.R | 106 ----------------- .../R/table_20240704123025.R | 106 ----------------- .../R/table_20240704123153.R | 106 ----------------- .../R/table_20240704123229.R | 99 ---------------- .../R/table_20240704123242.R | 100 ---------------- .../R/table_20240704123252.R | 100 ---------------- .../R/table_20240704123438.R | 100 ---------------- .../R/table_20240704123439.R | 100 ---------------- .../R/table_20240704123755.R | 72 ------------ .../R/table_20240704123814.R | 101 ---------------- .../R/table_20240704123846.R | 101 ---------------- .../R/table_20240704131922.R | 96 ---------------- .../R/table_20240704131936.R | 96 ---------------- .../R/table_20240704131944.R | 96 ---------------- .../R/table_20240704131948.R | 93 --------------- .../R/table_20240704132011.R | 93 --------------- .../R/table_20240704132020.R | 93 --------------- .../R/table_20240704132616.R | 93 --------------- .../R/table_20240704132618.R | 93 --------------- .../R/table_20240704132737.R | 89 --------------- .../R/table_20240704132740.R | 88 -------------- .../R/table_20240704132746.R | 87 -------------- .../R/table_20240704132808.R | 83 -------------- .../R/table_20240704132810.R | 83 -------------- .../R/table_20240704133533.R | 83 -------------- .../R/table_20240704133603.R | 83 -------------- .../R/table_20240704134658.R | 82 ------------- .../R/table_20240704134713.R | 81 ------------- .../R/table_20240704134726.R | 81 ------------- .../R/table_20240704134736.R | 81 ------------- .../R/table_20240704134738.R | 81 ------------- .../R/table_20240704135018.R | 81 ------------- 192 files changed, 1 insertion(+), 17855 deletions(-) delete mode 100644 .history/.github/workflows/run_20240703144031.yml delete mode 100644 .history/.github/workflows/run_20240703144457.yml delete mode 100644 .history/.github/workflows/run_20240703144505.yml delete mode 100644 .history/260-private-sector-totals_20240704134811.csv delete mode 100644 .history/260-private-sector-totals_20240704134910.csv delete mode 100644 .history/260-private-sector-totals_20240704134934.csv delete mode 100644 .history/build-table-script/R/table_20240703144031.R delete mode 100644 .history/build-table-script/R/table_20240703145945.R delete mode 100644 .history/build-table-script/R/table_20240703150052.R delete mode 100644 .history/build-table-script/R/table_20240703150534.R delete mode 100644 .history/build-table-script/R/table_20240703150544.R delete mode 100644 .history/build-table-script/R/table_20240703150553.R delete mode 100644 .history/build-table-script/R/table_20240703150627.R delete mode 100644 .history/build-table-script/R/table_20240703150636.R delete mode 100644 .history/build-table-script/R/table_20240703150902.R delete mode 100644 .history/build-table-script/R/table_20240703150914.R delete mode 100644 .history/build-table-script/R/table_20240703150928.R delete mode 100644 .history/build-table-script/R/table_20240703150958.R delete mode 100644 .history/build-table-script/R/table_20240703151048.R delete mode 100644 .history/build-table-script/R/table_20240703151213.R delete mode 100644 .history/build-table-script/R/table_20240703151220.R delete mode 100644 .history/build-table-script/R/table_20240703151245.R delete mode 100644 .history/build-table-script/R/table_20240703151332.R delete mode 100644 .history/build-table-script/R/table_20240703151345.R delete mode 100644 .history/build-table-script/R/table_20240703151359.R delete mode 100644 .history/build-table-script/R/table_20240703151427.R delete mode 100644 .history/build-table-script/R/table_20240703151446.R delete mode 100644 .history/build-table-script/R/table_20240703151447.R delete mode 100644 .history/build-table-script/R/table_20240703151649.R delete mode 100644 .history/build-table-script/R/table_20240703151658.R delete mode 100644 .history/build-table-script/R/table_20240703151756.R delete mode 100644 .history/build-table-script/R/table_20240703151802.R delete mode 100644 .history/build-table-script/R/table_20240703152128.R delete mode 100644 .history/build-table-script/R/table_20240703152158.R delete mode 100644 .history/build-table-script/R/table_20240703152200.R delete mode 100644 .history/build-table-script/R/table_20240703152202.R delete mode 100644 .history/build-table-script/R/table_20240703152223.R delete mode 100644 .history/build-table-script/R/table_20240703152232.R delete mode 100644 .history/build-table-script/R/table_20240703152251.R delete mode 100644 .history/build-table-script/R/table_20240704101656.R delete mode 100644 .history/build-table-script/R/table_20240704101756.R delete mode 100644 .history/build-table-script/R/table_20240704102106.R delete mode 100644 .history/build-table-script/R/table_20240704102116.R delete mode 100644 .history/build-table-script/R/table_20240704102119.R delete mode 100644 .history/build-table-script/R/table_20240704102126.R delete mode 100644 .history/build-table-script/R/table_20240704102131.R delete mode 100644 .history/build-table-script/R/table_20240704102134.R delete mode 100644 .history/build-table-script/R/table_20240704102142.R delete mode 100644 .history/build-table-script/R/table_20240704102212.R delete mode 100644 .history/build-table-script/R/table_20240704102214.R delete mode 100644 .history/build-table-script/R/table_20240704102217.R delete mode 100644 .history/build-table-script/R/table_20240704102229.R delete mode 100644 .history/build-table-script/R/table_20240704102746.R delete mode 100644 .history/build-table-script/R/table_20240704102752.R delete mode 100644 .history/build-table-script/R/table_20240704102957.R delete mode 100644 .history/build-table-script/R/table_20240704103001.R delete mode 100644 .history/build-table-script/R/table_20240704103045.R delete mode 100644 .history/build-table-script/R/table_20240704103144.R delete mode 100644 .history/build-table-script/R/table_20240704103155.R delete mode 100644 .history/build-table-script/R/table_20240704103541.R delete mode 100644 .history/build-table-script/R/table_20240704103554.R delete mode 100644 .history/build-table-script/R/table_20240704103618.R delete mode 100644 .history/build-table-script/R/table_20240704103624.R delete mode 100644 .history/build-table-script/R/table_20240704103627.R delete mode 100644 .history/build-table-script/R/table_20240704103830.R delete mode 100644 .history/build-table-script/R/table_20240704103834.R delete mode 100644 .history/build-table-script/R/table_20240704103835.R delete mode 100644 .history/build-table-script/R/table_20240704103850.R delete mode 100644 .history/build-table-script/R/table_20240704103908.R delete mode 100644 .history/build-table-script/R/table_20240704104421.R delete mode 100644 .history/build-table-script/R/table_20240704104427.R delete mode 100644 .history/build-table-script/R/table_20240704104428.R delete mode 100644 .history/build-table-script/R/table_20240704104452.R delete mode 100644 .history/build-table-script/R/table_20240704104500.R delete mode 100644 .history/build-table-script/R/table_20240704104510.R delete mode 100644 .history/build-table-script/R/table_20240704104514.R delete mode 100644 .history/build-table-script/R/table_20240704104654.R delete mode 100644 .history/build-table-script/R/table_20240704104703.R delete mode 100644 .history/build-table-script/R/table_20240704104717.R delete mode 100644 .history/build-table-script/R/table_20240704104745.R delete mode 100644 .history/build-table-script/R/table_20240704104816.R delete mode 100644 .history/build-table-script/R/table_20240704104837.R delete mode 100644 .history/build-table-script/R/table_20240704104841.R delete mode 100644 .history/build-table-script/R/table_20240704104853.R delete mode 100644 .history/build-table-script/R/table_20240704105458.R delete mode 100644 .history/build-table-script/R/table_20240704105503.R delete mode 100644 .history/build-table-script/R/table_20240704105640.R delete mode 100644 .history/build-table-script/R/table_20240704111427.R delete mode 100644 .history/build-table-script/R/table_20240704111524.R delete mode 100644 .history/build-table-script/R/table_20240704111545.R delete mode 100644 .history/build-table-script/R/table_20240704111714.R delete mode 100644 .history/build-table-script/R/table_20240704111733.R delete mode 100644 .history/build-table-script/R/table_20240704111811.R delete mode 100644 .history/build-table-script/R/table_20240704111831.R delete mode 100644 .history/build-table-script/R/table_20240704111841.R delete mode 100644 .history/build-table-script/R/table_20240704111913.R delete mode 100644 .history/build-table-script/R/table_20240704111920.R delete mode 100644 .history/build-table-script/R/table_20240704112001.R delete mode 100644 .history/build-table-script/R/table_20240704112112.R delete mode 100644 .history/build-table-script/R/table_20240704112238.R delete mode 100644 .history/build-table-script/R/table_20240704112450.R delete mode 100644 .history/build-table-script/R/table_20240704112520.R delete mode 100644 .history/build-table-script/R/table_20240704112833.R delete mode 100644 .history/build-table-script/R/table_20240704112837.R delete mode 100644 .history/build-table-script/R/table_20240704113137.R delete mode 100644 .history/build-table-script/R/table_20240704113144.R delete mode 100644 .history/build-table-script/R/table_20240704113150.R delete mode 100644 .history/build-table-script/R/table_20240704113202.R delete mode 100644 .history/build-table-script/R/table_20240704113250.R delete mode 100644 .history/build-table-script/R/table_20240704113255.R delete mode 100644 .history/build-table-script/R/table_20240704113311.R delete mode 100644 .history/build-table-script/R/table_20240704113316.R delete mode 100644 .history/build-table-script/R/table_20240704113434.R delete mode 100644 .history/build-table-script/R/table_20240704113512.R delete mode 100644 .history/build-table-script/R/table_20240704114121.R delete mode 100644 .history/build-table-script/R/table_20240704114454.R delete mode 100644 .history/build-table-script/R/table_20240704114458.R delete mode 100644 .history/build-table-script/R/table_20240704114512.R delete mode 100644 .history/build-table-script/R/table_20240704114648.R delete mode 100644 .history/build-table-script/R/table_20240704115542.R delete mode 100644 .history/build-table-script/R/table_20240704115545.R delete mode 100644 .history/build-table-script/R/table_20240704120053.R delete mode 100644 .history/build-table-script/R/table_20240704120141.R delete mode 100644 .history/build-table-script/R/table_20240704120227.R delete mode 100644 .history/build-table-script/R/table_20240704120233.R delete mode 100644 .history/build-table-script/R/table_20240704120236.R delete mode 100644 .history/build-table-script/R/table_20240704120453.R delete mode 100644 .history/build-table-script/R/table_20240704120713.R delete mode 100644 .history/build-table-script/R/table_20240704120716.R delete mode 100644 .history/build-table-script/R/table_20240704120741.R delete mode 100644 .history/build-table-script/R/table_20240704120832.R delete mode 100644 .history/build-table-script/R/table_20240704120836.R delete mode 100644 .history/build-table-script/R/table_20240704120941.R delete mode 100644 .history/build-table-script/R/table_20240704120943.R delete mode 100644 .history/build-table-script/R/table_20240704120945.R delete mode 100644 .history/build-table-script/R/table_20240704121033.R delete mode 100644 .history/build-table-script/R/table_20240704121414.R delete mode 100644 .history/build-table-script/R/table_20240704121416.R delete mode 100644 .history/build-table-script/R/table_20240704121422.R delete mode 100644 .history/build-table-script/R/table_20240704121423.R delete mode 100644 .history/build-table-script/R/table_20240704121632.R delete mode 100644 .history/build-table-script/R/table_20240704121652.R delete mode 100644 .history/build-table-script/R/table_20240704121703.R delete mode 100644 .history/build-table-script/R/table_20240704121810.R delete mode 100644 .history/build-table-script/R/table_20240704122005.R delete mode 100644 .history/build-table-script/R/table_20240704122024.R delete mode 100644 .history/build-table-script/R/table_20240704122025.R delete mode 100644 .history/build-table-script/R/table_20240704122031.R delete mode 100644 .history/build-table-script/R/table_20240704122102.R delete mode 100644 .history/build-table-script/R/table_20240704122152.R delete mode 100644 .history/build-table-script/R/table_20240704122154.R delete mode 100644 .history/build-table-script/R/table_20240704122352.R delete mode 100644 .history/build-table-script/R/table_20240704122524.R delete mode 100644 .history/build-table-script/R/table_20240704122533.R delete mode 100644 .history/build-table-script/R/table_20240704122610.R delete mode 100644 .history/build-table-script/R/table_20240704122910.R delete mode 100644 .history/build-table-script/R/table_20240704123007.R delete mode 100644 .history/build-table-script/R/table_20240704123011.R delete mode 100644 .history/build-table-script/R/table_20240704123013.R delete mode 100644 .history/build-table-script/R/table_20240704123019.R delete mode 100644 .history/build-table-script/R/table_20240704123025.R delete mode 100644 .history/build-table-script/R/table_20240704123153.R delete mode 100644 .history/build-table-script/R/table_20240704123229.R delete mode 100644 .history/build-table-script/R/table_20240704123242.R delete mode 100644 .history/build-table-script/R/table_20240704123252.R delete mode 100644 .history/build-table-script/R/table_20240704123438.R delete mode 100644 .history/build-table-script/R/table_20240704123439.R delete mode 100644 .history/build-table-script/R/table_20240704123755.R delete mode 100644 .history/build-table-script/R/table_20240704123814.R delete mode 100644 .history/build-table-script/R/table_20240704123846.R delete mode 100644 .history/build-table-script/R/table_20240704131922.R delete mode 100644 .history/build-table-script/R/table_20240704131936.R delete mode 100644 .history/build-table-script/R/table_20240704131944.R delete mode 100644 .history/build-table-script/R/table_20240704131948.R delete mode 100644 .history/build-table-script/R/table_20240704132011.R delete mode 100644 .history/build-table-script/R/table_20240704132020.R delete mode 100644 .history/build-table-script/R/table_20240704132616.R delete mode 100644 .history/build-table-script/R/table_20240704132618.R delete mode 100644 .history/build-table-script/R/table_20240704132737.R delete mode 100644 .history/build-table-script/R/table_20240704132740.R delete mode 100644 .history/build-table-script/R/table_20240704132746.R delete mode 100644 .history/build-table-script/R/table_20240704132808.R delete mode 100644 .history/build-table-script/R/table_20240704132810.R delete mode 100644 .history/build-table-script/R/table_20240704133533.R delete mode 100644 .history/build-table-script/R/table_20240704133603.R delete mode 100644 .history/build-table-script/R/table_20240704134658.R delete mode 100644 .history/build-table-script/R/table_20240704134713.R delete mode 100644 .history/build-table-script/R/table_20240704134726.R delete mode 100644 .history/build-table-script/R/table_20240704134736.R delete mode 100644 .history/build-table-script/R/table_20240704134738.R delete mode 100644 .history/build-table-script/R/table_20240704135018.R diff --git a/.gitignore b/.gitignore index bda5836..327a2ec 100644 --- a/.gitignore +++ b/.gitignore @@ -17,3 +17,4 @@ sectors.??.txt *.mo *.icloud build.sh +.history diff --git a/.history/.github/workflows/run_20240703144031.yml b/.history/.github/workflows/run_20240703144031.yml deleted file mode 100644 index ddd0e8c..0000000 --- a/.history/.github/workflows/run_20240703144031.yml +++ /dev/null @@ -1,47 +0,0 @@ -on: - push: - schedule: - - cron: '0 0 1 * *' # every month - -jobs: - build: - runs-on: ubuntu-latest - name: run R script - - env: - GBIF_USER: ${{ secrets.GBIF_USER }} - GBIF_PWD: ${{ secrets.GBIF_PWD }} - - steps: - - uses: actions/checkout@v3 - - name: Setup R - uses: r-lib/actions/setup-r@v2 - with: - r-version: '4.2.2' - - run: Rscript -e 'print("hello")' - - - name: Cache R packages - uses: actions/cache@v3 - with: - path: ${{ env.R_LIBS_USER }} - key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-2-${{ hashFiles('.github/depends.Rds') }} - restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-2- - - - name: Install pak - run: | - install.packages("pak", repos = "https://r-lib.github.io/p/pak/devel/") - shell: Rscript {0} - - - name: Install dependencies - run: | - pak::pkg_install("jhnwllr/gbifmt") - pak::pkg_install(c("httr2","tibble","tidyr","rgbif","readr","purrr","dplyr", "ascii")) - shell: Rscript {0} - - - name: build table - run: Rscript build-table-script/R/table.R - - - name: update machine tags - run: Rscript build-table-script/R/mt.R - - - uses: stefanzweifel/git-auto-commit-action@v4 diff --git a/.history/.github/workflows/run_20240703144457.yml b/.history/.github/workflows/run_20240703144457.yml deleted file mode 100644 index 55d61cd..0000000 --- a/.history/.github/workflows/run_20240703144457.yml +++ /dev/null @@ -1,47 +0,0 @@ -on: - push: - schedule: - - cron: '0 0 1 * *' # every month - -jobs: - build: - runs-on: ubuntu-latest - name: run R script - - env: - GBIF_USER: ${{ secrets.GBIF_USER }} - GBIF_PWD: ${{ secrets.GBIF_PWD }} - - steps: - - uses: actions/checkout@v3 - - name: Setup R - uses: r-lib/actions/setup-r@v2 - with: - r-version: '4.2.2' - - run: Rscript -e 'print("hello")' - - - name: Cache R packages - uses: actions/cache@v3 - with: - path: ${{ env.R_LIBS_USER }} - key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-2-${{ hashFiles('.github/depends.Rds') }} - restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-2- - - - name: Install pak - run: | - install.packages("pak", repos = "https://r-lib.github.io/p/pak/devel/") - shell: Rscript {0} - - - name: Install dependencies - run: | - pak::pkg_install("jhnwllr/gbifmt") - pak::pkg_install(c("httr2","tibble","tidyr","rgbif","readr","purrr","dplyr", "ascii")) - shell: Rscript {0} - - - name: build table - run: Rscript build-table-script/R/table.R - - # - name: update machine tags - # run: Rscript build-table-script/R/mt.R - - - uses: stefanzweifel/git-auto-commit-action@v4 diff --git a/.history/.github/workflows/run_20240703144505.yml b/.history/.github/workflows/run_20240703144505.yml deleted file mode 100644 index 55d61cd..0000000 --- a/.history/.github/workflows/run_20240703144505.yml +++ /dev/null @@ -1,47 +0,0 @@ -on: - push: - schedule: - - cron: '0 0 1 * *' # every month - -jobs: - build: - runs-on: ubuntu-latest - name: run R script - - env: - GBIF_USER: ${{ secrets.GBIF_USER }} - GBIF_PWD: ${{ secrets.GBIF_PWD }} - - steps: - - uses: actions/checkout@v3 - - name: Setup R - uses: r-lib/actions/setup-r@v2 - with: - r-version: '4.2.2' - - run: Rscript -e 'print("hello")' - - - name: Cache R packages - uses: actions/cache@v3 - with: - path: ${{ env.R_LIBS_USER }} - key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-2-${{ hashFiles('.github/depends.Rds') }} - restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-2- - - - name: Install pak - run: | - install.packages("pak", repos = "https://r-lib.github.io/p/pak/devel/") - shell: Rscript {0} - - - name: Install dependencies - run: | - pak::pkg_install("jhnwllr/gbifmt") - pak::pkg_install(c("httr2","tibble","tidyr","rgbif","readr","purrr","dplyr", "ascii")) - shell: Rscript {0} - - - name: build table - run: Rscript build-table-script/R/table.R - - # - name: update machine tags - # run: Rscript build-table-script/R/mt.R - - - uses: stefanzweifel/git-auto-commit-action@v4 diff --git a/.history/260-private-sector-totals_20240704134811.csv b/.history/260-private-sector-totals_20240704134811.csv deleted file mode 100644 index 34f2048..0000000 --- a/.history/260-private-sector-totals_20240704134811.csv +++ /dev/null @@ -1 +0,0 @@ -595,9 606 840,10 745 diff --git a/.history/260-private-sector-totals_20240704134910.csv b/.history/260-private-sector-totals_20240704134910.csv deleted file mode 100644 index 34f2048..0000000 --- a/.history/260-private-sector-totals_20240704134910.csv +++ /dev/null @@ -1 +0,0 @@ -595,9 606 840,10 745 diff --git a/.history/260-private-sector-totals_20240704134934.csv b/.history/260-private-sector-totals_20240704134934.csv deleted file mode 100644 index 34f2048..0000000 --- a/.history/260-private-sector-totals_20240704134934.csv +++ /dev/null @@ -1 +0,0 @@ -595,9 606 840,10 745 diff --git a/.history/build-table-script/R/table_20240703144031.R b/.history/build-table-script/R/table_20240703144031.R deleted file mode 100644 index b4cf42e..0000000 --- a/.history/build-table-script/R/table_20240703144031.R +++ /dev/null @@ -1,74 +0,0 @@ - -library(dplyr) -library(purrr) -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -ss = readr::read_csv("build-table-script/data/source.tsv") %>% -mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - diff --git a/.history/build-table-script/R/table_20240703145945.R b/.history/build-table-script/R/table_20240703145945.R deleted file mode 100644 index 03bcf8c..0000000 --- a/.history/build-table-script/R/table_20240703145945.R +++ /dev/null @@ -1,74 +0,0 @@ - -library(dplyr) -library(purrr) -setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -ss = readr::read_csv("build-table-script/data/source.tsv") %>% -mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - diff --git a/.history/build-table-script/R/table_20240703150052.R b/.history/build-table-script/R/table_20240703150052.R deleted file mode 100644 index b4cf42e..0000000 --- a/.history/build-table-script/R/table_20240703150052.R +++ /dev/null @@ -1,74 +0,0 @@ - -library(dplyr) -library(purrr) -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -ss = readr::read_csv("build-table-script/data/source.tsv") %>% -mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - diff --git a/.history/build-table-script/R/table_20240703150534.R b/.history/build-table-script/R/table_20240703150534.R deleted file mode 100644 index 03aa9d8..0000000 --- a/.history/build-table-script/R/table_20240703150534.R +++ /dev/null @@ -1,77 +0,0 @@ - -library(dplyr) -library(purrr) -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -if(FALSE) { - -ss = readr::read_csv("build-table-script/data/source.tsv") %>% -mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240703150544.R b/.history/build-table-script/R/table_20240703150544.R deleted file mode 100644 index 778af6b..0000000 --- a/.history/build-table-script/R/table_20240703150544.R +++ /dev/null @@ -1,80 +0,0 @@ - -library(dplyr) -library(purrr) -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - - - - -if(FALSE) { - -ss = readr::read_csv("build-table-script/data/source.tsv") %>% -mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240703150553.R b/.history/build-table-script/R/table_20240703150553.R deleted file mode 100644 index 778af6b..0000000 --- a/.history/build-table-script/R/table_20240703150553.R +++ /dev/null @@ -1,80 +0,0 @@ - -library(dplyr) -library(purrr) -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - - - - -if(FALSE) { - -ss = readr::read_csv("build-table-script/data/source.tsv") %>% -mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240703150627.R b/.history/build-table-script/R/table_20240703150627.R deleted file mode 100644 index 1686128..0000000 --- a/.history/build-table-script/R/table_20240703150627.R +++ /dev/null @@ -1,80 +0,0 @@ - -library(dplyr) -library(purrr) -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -print("dog") - - -if(FALSE) { - -ss = readr::read_csv("build-table-script/data/source.tsv") %>% -mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240703150636.R b/.history/build-table-script/R/table_20240703150636.R deleted file mode 100644 index 9353333..0000000 --- a/.history/build-table-script/R/table_20240703150636.R +++ /dev/null @@ -1,78 +0,0 @@ - -library(dplyr) -library(purrr) -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - - -if(FALSE) { - -ss = readr::read_csv("build-table-script/data/source.tsv") %>% -mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240703150902.R b/.history/build-table-script/R/table_20240703150902.R deleted file mode 100644 index bf24934..0000000 --- a/.history/build-table-script/R/table_20240703150902.R +++ /dev/null @@ -1,80 +0,0 @@ - -library(dplyr) -library(purrr) -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -get_mt("privateSector.gbif.org",type="dataset",limit=500) - - -if(FALSE) { - -ss = readr::read_csv("build-table-script/data/source.tsv") %>% -mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240703150914.R b/.history/build-table-script/R/table_20240703150914.R deleted file mode 100644 index bbeed35..0000000 --- a/.history/build-table-script/R/table_20240703150914.R +++ /dev/null @@ -1,81 +0,0 @@ - -library(dplyr) -library(purrr) -libary(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -get_mt("privateSector.gbif.org",type="dataset",limit=500) - - -if(FALSE) { - -ss = readr::read_csv("build-table-script/data/source.tsv") %>% -mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240703150928.R b/.history/build-table-script/R/table_20240703150928.R deleted file mode 100644 index 5328b05..0000000 --- a/.history/build-table-script/R/table_20240703150928.R +++ /dev/null @@ -1,81 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -get_mt("privateSector.gbif.org",type="dataset",limit=500) - - -if(FALSE) { - -ss = readr::read_csv("build-table-script/data/source.tsv") %>% -mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240703150958.R b/.history/build-table-script/R/table_20240703150958.R deleted file mode 100644 index 0afbe66..0000000 --- a/.history/build-table-script/R/table_20240703150958.R +++ /dev/null @@ -1,79 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -get_mt("privateSector.gbif.org",type="dataset",limit=500) - -ss = readr::read_csv("build-table-script/data/source.tsv") %>% -mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -glimpse() - -if(FALSE) { -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240703151048.R b/.history/build-table-script/R/table_20240703151048.R deleted file mode 100644 index f663345..0000000 --- a/.history/build-table-script/R/table_20240703151048.R +++ /dev/null @@ -1,80 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -get_mt("privateSector.gbif.org",type="dataset",limit=500) %>% - - -ss = readr::read_csv("build-table-script/data/source.tsv") %>% -mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -glimpse() - -if(FALSE) { -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240703151213.R b/.history/build-table-script/R/table_20240703151213.R deleted file mode 100644 index 7813df9..0000000 --- a/.history/build-table-script/R/table_20240703151213.R +++ /dev/null @@ -1,83 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -get_mt("privateSector.gbif.org",type="dataset",limit=500) %>% -get_mt("privateSector.gbif.org",type="publisher",limit=500) %>% - -mutate() - - -ss = readr::read_csv("build-table-script/data/source.tsv") %>% -mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -glimpse() - -if(FALSE) { -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240703151220.R b/.history/build-table-script/R/table_20240703151220.R deleted file mode 100644 index 33a9f81..0000000 --- a/.history/build-table-script/R/table_20240703151220.R +++ /dev/null @@ -1,83 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -get_mt("privateSector.gbif.org",type="dataset",limit=500) -get_mt("privateSector.gbif.org",type="publisher",limit=500) - -# mutate() - - -ss = readr::read_csv("build-table-script/data/source.tsv") %>% -mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -glimpse() - -if(FALSE) { -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240703151245.R b/.history/build-table-script/R/table_20240703151245.R deleted file mode 100644 index 5f8e402..0000000 --- a/.history/build-table-script/R/table_20240703151245.R +++ /dev/null @@ -1,83 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -get_mt("privateSector.gbif.org",type="dataset",limit=500) -get_mt("privateSector.gbif.org",type="organization",limit=500) - -# mutate() - - -ss = readr::read_csv("build-table-script/data/source.tsv") %>% -mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -glimpse() - -if(FALSE) { -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240703151332.R b/.history/build-table-script/R/table_20240703151332.R deleted file mode 100644 index 2737883..0000000 --- a/.history/build-table-script/R/table_20240703151332.R +++ /dev/null @@ -1,84 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -list("dataset","organization") %>% -map(get_mt("privateSector.gbif.org",type="dataset",limit=500)) - - -# mutate() - - -ss = readr::read_csv("build-table-script/data/source.tsv") %>% -mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -glimpse() - -if(FALSE) { -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240703151345.R b/.history/build-table-script/R/table_20240703151345.R deleted file mode 100644 index eafeb52..0000000 --- a/.history/build-table-script/R/table_20240703151345.R +++ /dev/null @@ -1,84 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -list("dataset","organization") %>% -map(~ get_mt("privateSector.gbif.org",type=.x,limit=500)) - - -# mutate() - - -ss = readr::read_csv("build-table-script/data/source.tsv") %>% -mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -glimpse() - -if(FALSE) { -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240703151359.R b/.history/build-table-script/R/table_20240703151359.R deleted file mode 100644 index 89c5d48..0000000 --- a/.history/build-table-script/R/table_20240703151359.R +++ /dev/null @@ -1,85 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -list("dataset","organization") %>% -map(~ get_mt("privateSector.gbif.org",type=.x,limit=500)) %>% -bind_rows() - - -# mutate() - - -ss = readr::read_csv("build-table-script/data/source.tsv") %>% -mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -glimpse() - -if(FALSE) { -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240703151427.R b/.history/build-table-script/R/table_20240703151427.R deleted file mode 100644 index 58d40c1..0000000 --- a/.history/build-table-script/R/table_20240703151427.R +++ /dev/null @@ -1,86 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -list("dataset","organization") %>% -map(~ get_mt("privateSector.gbif.org",type=.x,limit=500)) %>% -bind_rows() |> - - - -# mutate() - - -ss = readr::read_csv("build-table-script/data/source.tsv") %>% -mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -glimpse() - -if(FALSE) { -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240703151446.R b/.history/build-table-script/R/table_20240703151446.R deleted file mode 100644 index c4ddb59..0000000 --- a/.history/build-table-script/R/table_20240703151446.R +++ /dev/null @@ -1,86 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -list("dataset","organization") %>% -map(~ get_mt("privateSector.gbif.org",type=.x,limit=500)) %>% -bind_rows() %>% - - - -# mutate() - - -ss = readr::read_csv("build-table-script/data/source.tsv") %>% -mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -glimpse() - -if(FALSE) { -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240703151447.R b/.history/build-table-script/R/table_20240703151447.R deleted file mode 100644 index c4ddb59..0000000 --- a/.history/build-table-script/R/table_20240703151447.R +++ /dev/null @@ -1,86 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -list("dataset","organization") %>% -map(~ get_mt("privateSector.gbif.org",type=.x,limit=500)) %>% -bind_rows() %>% - - - -# mutate() - - -ss = readr::read_csv("build-table-script/data/source.tsv") %>% -mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -glimpse() - -if(FALSE) { -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240703151649.R b/.history/build-table-script/R/table_20240703151649.R deleted file mode 100644 index 85427eb..0000000 --- a/.history/build-table-script/R/table_20240703151649.R +++ /dev/null @@ -1,86 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -list("dataset","organization") %>% -map(~ get_mt("privateSector.gbif.org",type=.x,limit=500)) %>% -bind_rows() - - - -# mutate() - - -ss = readr::read_csv("build-table-script/data/source.tsv") %>% -mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -glimpse() - -if(FALSE) { -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240703151658.R b/.history/build-table-script/R/table_20240703151658.R deleted file mode 100644 index a398194..0000000 --- a/.history/build-table-script/R/table_20240703151658.R +++ /dev/null @@ -1,84 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -list("dataset","organization") %>% -map(~ get_mt("privateSector.gbif.org",type=.x,limit=500)) %>% -bind_rows() - -# mutate() - - -ss = readr::read_csv("build-table-script/data/source.tsv") %>% -mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -glimpse() - -if(FALSE) { -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240703151756.R b/.history/build-table-script/R/table_20240703151756.R deleted file mode 100644 index 05fb4c2..0000000 --- a/.history/build-table-script/R/table_20240703151756.R +++ /dev/null @@ -1,86 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets from machineTags -list("dataset","organization") %>% -map(~ get_mt("privateSector.gbif.org",type=.x,limit=500)) %>% -bind_rows() |> - - -# mutate() - - -ss = readr::read_csv("build-table-script/data/source.tsv") %>% -mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -glimpse() - -if(FALSE) { -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240703151802.R b/.history/build-table-script/R/table_20240703151802.R deleted file mode 100644 index 57c00b0..0000000 --- a/.history/build-table-script/R/table_20240703151802.R +++ /dev/null @@ -1,86 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets from machineTags -list("dataset","organization") %>% -map(~ get_mt("privateSector.gbif.org",type=.x,limit=500)) %>% -bind_rows() %>% - - -# mutate() - - -ss = readr::read_csv("build-table-script/data/source.tsv") %>% -mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -glimpse() - -if(FALSE) { -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240703152128.R b/.history/build-table-script/R/table_20240703152128.R deleted file mode 100644 index 1b85fc6..0000000 --- a/.history/build-table-script/R/table_20240703152128.R +++ /dev/null @@ -1,86 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets from machineTags -list("dataset","organization") %>% -map(~ get_mt("privateSector.gbif.org",type=.x,limit=500)) %>% -bind_rows() %>% -mutate(link = gsub("https://www.gbif.org/publisher/",uuid)) - -# mutate() - - -ss = readr::read_csv("build-table-script/data/source.tsv") %>% -mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -glimpse() - -if(FALSE) { -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240703152158.R b/.history/build-table-script/R/table_20240703152158.R deleted file mode 100644 index 63e3f0a..0000000 --- a/.history/build-table-script/R/table_20240703152158.R +++ /dev/null @@ -1,87 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets from machineTags -list("dataset","organization") %>% -map(~ get_mt("privateSector.gbif.org",type=.x,limit=500) %>% -mutate(type=.x)) %>% -bind_rows() %>% -mutate(link = gsub("https://www.gbif.org/publisher/",uuid)) - -# mutate() - - -ss = readr::read_csv("build-table-script/data/source.tsv") %>% -mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -glimpse() - -if(FALSE) { -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240703152200.R b/.history/build-table-script/R/table_20240703152200.R deleted file mode 100644 index 63e3f0a..0000000 --- a/.history/build-table-script/R/table_20240703152200.R +++ /dev/null @@ -1,87 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets from machineTags -list("dataset","organization") %>% -map(~ get_mt("privateSector.gbif.org",type=.x,limit=500) %>% -mutate(type=.x)) %>% -bind_rows() %>% -mutate(link = gsub("https://www.gbif.org/publisher/",uuid)) - -# mutate() - - -ss = readr::read_csv("build-table-script/data/source.tsv") %>% -mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -glimpse() - -if(FALSE) { -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240703152202.R b/.history/build-table-script/R/table_20240703152202.R deleted file mode 100644 index 63e3f0a..0000000 --- a/.history/build-table-script/R/table_20240703152202.R +++ /dev/null @@ -1,87 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets from machineTags -list("dataset","organization") %>% -map(~ get_mt("privateSector.gbif.org",type=.x,limit=500) %>% -mutate(type=.x)) %>% -bind_rows() %>% -mutate(link = gsub("https://www.gbif.org/publisher/",uuid)) - -# mutate() - - -ss = readr::read_csv("build-table-script/data/source.tsv") %>% -mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -glimpse() - -if(FALSE) { -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240703152223.R b/.history/build-table-script/R/table_20240703152223.R deleted file mode 100644 index 2d46f45..0000000 --- a/.history/build-table-script/R/table_20240703152223.R +++ /dev/null @@ -1,89 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets from machineTags -list("dataset","organization") %>% -map(~ -get_mt("privateSector.gbif.org",type=.x,limit=500) %>% -mutate(link = paste("https://www.gbif.org/publisher/",uuid)) -mutate(type=.x) -) %>% -bind_rows() %>% - -# mutate() - - -ss = readr::read_csv("build-table-script/data/source.tsv") %>% -mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -glimpse() - -if(FALSE) { -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240703152232.R b/.history/build-table-script/R/table_20240703152232.R deleted file mode 100644 index 3f5fcd4..0000000 --- a/.history/build-table-script/R/table_20240703152232.R +++ /dev/null @@ -1,89 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets from machineTags -list("dataset","organization") %>% -map(~ -get_mt("privateSector.gbif.org",type=.x,limit=500) %>% -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) -mutate(type=.x) -) %>% -bind_rows() %>% - -# mutate() - - -ss = readr::read_csv("build-table-script/data/source.tsv") %>% -mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -glimpse() - -if(FALSE) { -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240703152251.R b/.history/build-table-script/R/table_20240703152251.R deleted file mode 100644 index 835177b..0000000 --- a/.history/build-table-script/R/table_20240703152251.R +++ /dev/null @@ -1,89 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets from machineTags -list("dataset","organization") %>% -map(~ -get_mt("privateSector.gbif.org",type=.x,limit=500) %>% -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) -mutate(pd=.x) -) %>% -bind_rows() %>% - -# mutate() - - -ss = readr::read_csv("build-table-script/data/source.tsv") %>% -mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -glimpse() - -if(FALSE) { -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240704101656.R b/.history/build-table-script/R/table_20240704101656.R deleted file mode 100644 index 74ac219..0000000 --- a/.history/build-table-script/R/table_20240704101656.R +++ /dev/null @@ -1,89 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets from machineTags -list("dataset","organization") %>% -map(~ -get_mt("privateSector.gbif.org",type=.x,limit=500) %>% -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) -mutate(pd=.x) -) %>% -bind_rows() %>% -glimpse() -# mutate() - - -ss = readr::read_csv("build-table-script/data/source.tsv") %>% -mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -glimpse() - -if(FALSE) { -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240704101756.R b/.history/build-table-script/R/table_20240704101756.R deleted file mode 100644 index b0d62c1..0000000 --- a/.history/build-table-script/R/table_20240704101756.R +++ /dev/null @@ -1,89 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets from machineTags -list("dataset","organization") %>% -map(~ -get_mt("privateSector.gbif.org",type=.x,limit=500) %>% -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd=.x) -) %>% -bind_rows() %>% -glimpse() -# mutate() - - -ss = readr::read_csv("build-table-script/data/source.tsv") %>% -mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -glimpse() - -if(FALSE) { -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240704102106.R b/.history/build-table-script/R/table_20240704102106.R deleted file mode 100644 index db12f20..0000000 --- a/.history/build-table-script/R/table_20240704102106.R +++ /dev/null @@ -1,90 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets from machineTags -list("dataset","organization") %>% -map(~ -get_mt("privateSector.gbif.org",type=.x,limit=500) %>% -mutate(pd=.x) -) %>% -bind_rows() %>% -glimpse() - -# mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -# mutate() - - -ss = readr::read_csv("build-table-script/data/source.tsv") %>% -mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -glimpse() - -if(FALSE) { -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240704102116.R b/.history/build-table-script/R/table_20240704102116.R deleted file mode 100644 index 7421ed6..0000000 --- a/.history/build-table-script/R/table_20240704102116.R +++ /dev/null @@ -1,90 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets from machineTags -list("dataset","organization") %>% -# map(~ -# get_mt("privateSector.gbif.org",type=.x,limit=500) %>% -# mutate(pd=.x) -# ) %>% -# bind_rows() %>% -# glimpse() - -# mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -# mutate() - - -ss = readr::read_csv("build-table-script/data/source.tsv") %>% -mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -glimpse() - -if(FALSE) { -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240704102119.R b/.history/build-table-script/R/table_20240704102119.R deleted file mode 100644 index 757d7b5..0000000 --- a/.history/build-table-script/R/table_20240704102119.R +++ /dev/null @@ -1,90 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets from machineTags -# list("dataset","organization") %>% -# map(~ -# get_mt("privateSector.gbif.org",type=.x,limit=500) %>% -# mutate(pd=.x) -# ) %>% -# bind_rows() %>% -# glimpse() - -# mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -# mutate() - - -ss = readr::read_csv("build-table-script/data/source.tsv") %>% -mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -glimpse() - -if(FALSE) { -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240704102126.R b/.history/build-table-script/R/table_20240704102126.R deleted file mode 100644 index f87fcde..0000000 --- a/.history/build-table-script/R/table_20240704102126.R +++ /dev/null @@ -1,90 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets from machineTags -# list("dataset","organization") %>% -# map(~ -get_mt("privateSector.gbif.org",type="dataset",limit=500) %>% -# mutate(pd=.x) -# ) %>% -# bind_rows() %>% -# glimpse() - -# mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -# mutate() - - -ss = readr::read_csv("build-table-script/data/source.tsv") %>% -mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -glimpse() - -if(FALSE) { -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240704102131.R b/.history/build-table-script/R/table_20240704102131.R deleted file mode 100644 index 75f71d6..0000000 --- a/.history/build-table-script/R/table_20240704102131.R +++ /dev/null @@ -1,90 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets from machineTags -# list("dataset","organization") %>% -# map(~ -get_mt("privateSector.gbif.org",type="dataset",limit=500) -# mutate(pd=.x) -# ) %>% -# bind_rows() %>% -# glimpse() - -# mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -# mutate() - - -ss = readr::read_csv("build-table-script/data/source.tsv") %>% -mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -glimpse() - -if(FALSE) { -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240704102134.R b/.history/build-table-script/R/table_20240704102134.R deleted file mode 100644 index 2a30134..0000000 --- a/.history/build-table-script/R/table_20240704102134.R +++ /dev/null @@ -1,92 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets from machineTags -get_mt("privateSector.gbif.org",type="dataset",limit=500) - - -# list("dataset","organization") %>% -# map(~ -# mutate(pd=.x) -# ) %>% -# bind_rows() %>% -# glimpse() - -# mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -# mutate() - - -ss = readr::read_csv("build-table-script/data/source.tsv") %>% -mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -glimpse() - -if(FALSE) { -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240704102142.R b/.history/build-table-script/R/table_20240704102142.R deleted file mode 100644 index 8689065..0000000 --- a/.history/build-table-script/R/table_20240704102142.R +++ /dev/null @@ -1,93 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets from machineTags -get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -glimpse() - - -# list("dataset","organization") %>% -# map(~ -# mutate(pd=.x) -# ) %>% -# bind_rows() %>% -# glimpse() - -# mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -# mutate() - - -ss = readr::read_csv("build-table-script/data/source.tsv") %>% -mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -glimpse() - -if(FALSE) { -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240704102212.R b/.history/build-table-script/R/table_20240704102212.R deleted file mode 100644 index 3507d21..0000000 --- a/.history/build-table-script/R/table_20240704102212.R +++ /dev/null @@ -1,94 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets from machineTags -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -glimpse() - - -# list("dataset","organization") %>% -# map(~ -# mutate(pd=.x) -# ) %>% -# bind_rows() %>% -# glimpse() - -# mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -# mutate() - - -ss = readr::read_csv("build-table-script/data/source.tsv") %>% -mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -glimpse() - -if(FALSE) { -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240704102214.R b/.history/build-table-script/R/table_20240704102214.R deleted file mode 100644 index 3507d21..0000000 --- a/.history/build-table-script/R/table_20240704102214.R +++ /dev/null @@ -1,94 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets from machineTags -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -glimpse() - - -# list("dataset","organization") %>% -# map(~ -# mutate(pd=.x) -# ) %>% -# bind_rows() %>% -# glimpse() - -# mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -# mutate() - - -ss = readr::read_csv("build-table-script/data/source.tsv") %>% -mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -glimpse() - -if(FALSE) { -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240704102217.R b/.history/build-table-script/R/table_20240704102217.R deleted file mode 100644 index 3507d21..0000000 --- a/.history/build-table-script/R/table_20240704102217.R +++ /dev/null @@ -1,94 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets from machineTags -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -glimpse() - - -# list("dataset","organization") %>% -# map(~ -# mutate(pd=.x) -# ) %>% -# bind_rows() %>% -# glimpse() - -# mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -# mutate() - - -ss = readr::read_csv("build-table-script/data/source.tsv") %>% -mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -glimpse() - -if(FALSE) { -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240704102229.R b/.history/build-table-script/R/table_20240704102229.R deleted file mode 100644 index fa5dc59..0000000 --- a/.history/build-table-script/R/table_20240704102229.R +++ /dev/null @@ -1,94 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets from machineTags -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -glimpse() - - -# list("dataset","organization") %>% -# map(~ -# mutate(pd=.x) -# ) %>% -# bind_rows() %>% -# glimpse() - -# mutate() - - -ss = readr::read_csv("build-table-script/data/source.tsv") %>% -mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -glimpse() - -if(FALSE) { -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240704102746.R b/.history/build-table-script/R/table_20240704102746.R deleted file mode 100644 index fa5dc59..0000000 --- a/.history/build-table-script/R/table_20240704102746.R +++ /dev/null @@ -1,94 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets from machineTags -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -glimpse() - - -# list("dataset","organization") %>% -# map(~ -# mutate(pd=.x) -# ) %>% -# bind_rows() %>% -# glimpse() - -# mutate() - - -ss = readr::read_csv("build-table-script/data/source.tsv") %>% -mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -glimpse() - -if(FALSE) { -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240704102752.R b/.history/build-table-script/R/table_20240704102752.R deleted file mode 100644 index f79e9e0..0000000 --- a/.history/build-table-script/R/table_20240704102752.R +++ /dev/null @@ -1,95 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets from machineTags -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% - -glimpse() - - -# list("dataset","organization") %>% -# map(~ -# mutate(pd=.x) -# ) %>% -# bind_rows() %>% -# glimpse() - -# mutate() - - -ss = readr::read_csv("build-table-script/data/source.tsv") %>% -mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -glimpse() - -if(FALSE) { -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240704102957.R b/.history/build-table-script/R/table_20240704102957.R deleted file mode 100644 index c492065..0000000 --- a/.history/build-table-script/R/table_20240704102957.R +++ /dev/null @@ -1,95 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets from machineTags -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -select(link, `Activity sector` = value, key = uuid, pd = "dataset") -glimpse() - - -# list("dataset","organization") %>% -# map(~ -# mutate(pd=.x) -# ) %>% -# bind_rows() %>% -# glimpse() - -# mutate() - - -ss = readr::read_csv("build-table-script/data/source.tsv") %>% -mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -glimpse() - -if(FALSE) { -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240704103001.R b/.history/build-table-script/R/table_20240704103001.R deleted file mode 100644 index 226050c..0000000 --- a/.history/build-table-script/R/table_20240704103001.R +++ /dev/null @@ -1,95 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets from machineTags -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -select(link, `Activity sector` = value, key = uuid, pd = "dataset") |> -glimpse() - - -# list("dataset","organization") %>% -# map(~ -# mutate(pd=.x) -# ) %>% -# bind_rows() %>% -# glimpse() - -# mutate() - - -ss = readr::read_csv("build-table-script/data/source.tsv") %>% -mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -glimpse() - -if(FALSE) { -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240704103045.R b/.history/build-table-script/R/table_20240704103045.R deleted file mode 100644 index fc5a153..0000000 --- a/.history/build-table-script/R/table_20240704103045.R +++ /dev/null @@ -1,96 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets from machineTags -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, key = uuid) |> -glimpse() - - -# list("dataset","organization") %>% -# map(~ -# mutate(pd=.x) -# ) %>% -# bind_rows() %>% -# glimpse() - -# mutate() - - -ss = readr::read_csv("build-table-script/data/source.tsv") %>% -mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -glimpse() - -if(FALSE) { -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240704103144.R b/.history/build-table-script/R/table_20240704103144.R deleted file mode 100644 index 1383b36..0000000 --- a/.history/build-table-script/R/table_20240704103144.R +++ /dev/null @@ -1,96 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets from machineTags -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, key = uuid,pd) |> -glimpse() - - -# list("dataset","organization") %>% -# map(~ -# mutate(pd=.x) -# ) %>% -# bind_rows() %>% -# glimpse() - -# mutate() - - -ss = readr::read_csv("build-table-script/data/source.tsv") %>% -mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -glimpse() - -if(FALSE) { -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240704103155.R b/.history/build-table-script/R/table_20240704103155.R deleted file mode 100644 index c17db63..0000000 --- a/.history/build-table-script/R/table_20240704103155.R +++ /dev/null @@ -1,96 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets from machineTags -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - - -# list("dataset","organization") %>% -# map(~ -# mutate(pd=.x) -# ) %>% -# bind_rows() %>% -# glimpse() - -# mutate() - - -ss = readr::read_csv("build-table-script/data/source.tsv") %>% -mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -glimpse() - -if(FALSE) { -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240704103541.R b/.history/build-table-script/R/table_20240704103541.R deleted file mode 100644 index 2c1c1f0..0000000 --- a/.history/build-table-script/R/table_20240704103541.R +++ /dev/null @@ -1,86 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets from machineTags -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - - -ss = readr::read_csv("build-table-script/data/source.tsv") %>% -mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -glimpse() - -if(FALSE) { -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240704103554.R b/.history/build-table-script/R/table_20240704103554.R deleted file mode 100644 index 78290ab..0000000 --- a/.history/build-table-script/R/table_20240704103554.R +++ /dev/null @@ -1,92 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets from machineTags -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ds_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - - -ss = readr::read_csv("build-table-script/data/source.tsv") %>% -mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -glimpse() - -if(FALSE) { -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240704103618.R b/.history/build-table-script/R/table_20240704103618.R deleted file mode 100644 index 193839e..0000000 --- a/.history/build-table-script/R/table_20240704103618.R +++ /dev/null @@ -1,92 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets from machineTags -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ds_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - - -ss = readr::read_csv("build-table-script/data/source.tsv") %>% -mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -glimpse() - -if(FALSE) { -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240704103624.R b/.history/build-table-script/R/table_20240704103624.R deleted file mode 100644 index 4a034a9..0000000 --- a/.history/build-table-script/R/table_20240704103624.R +++ /dev/null @@ -1,92 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets from machineTags -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - - -ss = readr::read_csv("build-table-script/data/source.tsv") %>% -mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -glimpse() - -if(FALSE) { -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240704103627.R b/.history/build-table-script/R/table_20240704103627.R deleted file mode 100644 index 6ea2cd0..0000000 --- a/.history/build-table-script/R/table_20240704103627.R +++ /dev/null @@ -1,93 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets from machineTags -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - - - -ss = readr::read_csv("build-table-script/data/source.tsv") %>% -mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -glimpse() - -if(FALSE) { -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240704103830.R b/.history/build-table-script/R/table_20240704103830.R deleted file mode 100644 index 5f0ea76..0000000 --- a/.history/build-table-script/R/table_20240704103830.R +++ /dev/null @@ -1,94 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets from machineTags -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() - -ss = readr::read_csv("build-table-script/data/source.tsv") %>% -mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -glimpse() - -if(FALSE) { -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240704103834.R b/.history/build-table-script/R/table_20240704103834.R deleted file mode 100644 index 6937d14..0000000 --- a/.history/build-table-script/R/table_20240704103834.R +++ /dev/null @@ -1,94 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets from machineTags -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() - -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -if(FALSE) { -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240704103835.R b/.history/build-table-script/R/table_20240704103835.R deleted file mode 100644 index 6937d14..0000000 --- a/.history/build-table-script/R/table_20240704103835.R +++ /dev/null @@ -1,94 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets from machineTags -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() - -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -if(FALSE) { -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240704103850.R b/.history/build-table-script/R/table_20240704103850.R deleted file mode 100644 index 99c0d28..0000000 --- a/.history/build-table-script/R/table_20240704103850.R +++ /dev/null @@ -1,95 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets from machineTags -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -if(FALSE) { -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240704103908.R b/.history/build-table-script/R/table_20240704103908.R deleted file mode 100644 index 6cd5957..0000000 --- a/.history/build-table-script/R/table_20240704103908.R +++ /dev/null @@ -1,95 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets from machineTags -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -if(FALSE) { -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240704104421.R b/.history/build-table-script/R/table_20240704104421.R deleted file mode 100644 index e4fb9a5..0000000 --- a/.history/build-table-script/R/table_20240704104421.R +++ /dev/null @@ -1,96 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets from machineTags -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -glimpse() -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -if(FALSE) { -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240704104427.R b/.history/build-table-script/R/table_20240704104427.R deleted file mode 100644 index a73e5f9..0000000 --- a/.history/build-table-script/R/table_20240704104427.R +++ /dev/null @@ -1,97 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets from machineTags -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -glimpse() - -# select(key,`Activity sector`) %>% -# mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -# mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -# mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -# mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -# mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -# mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -# merge(gbif_country,by="iso2") %>% -# glimpse() - -if(FALSE) { -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240704104428.R b/.history/build-table-script/R/table_20240704104428.R deleted file mode 100644 index a73e5f9..0000000 --- a/.history/build-table-script/R/table_20240704104428.R +++ /dev/null @@ -1,97 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets from machineTags -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -glimpse() - -# select(key,`Activity sector`) %>% -# mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -# mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -# mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -# mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -# mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -# mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -# merge(gbif_country,by="iso2") %>% -# glimpse() - -if(FALSE) { -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240704104452.R b/.history/build-table-script/R/table_20240704104452.R deleted file mode 100644 index 8fe8b96..0000000 --- a/.history/build-table-script/R/table_20240704104452.R +++ /dev/null @@ -1,97 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets from directly from machineTags -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -glimpse() - -# select(key,`Activity sector`) %>% -# mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -# mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -# mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -# mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -# mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -# mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -# merge(gbif_country,by="iso2") %>% -# glimpse() - -if(FALSE) { -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240704104500.R b/.history/build-table-script/R/table_20240704104500.R deleted file mode 100644 index 5215811..0000000 --- a/.history/build-table-script/R/table_20240704104500.R +++ /dev/null @@ -1,99 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets from directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -glimpse() - -# select(key,`Activity sector`) %>% -# mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -# mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -# mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -# mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -# mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -# mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -# merge(gbif_country,by="iso2") %>% -# glimpse() - -if(FALSE) { -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240704104510.R b/.history/build-table-script/R/table_20240704104510.R deleted file mode 100644 index d28cc84..0000000 --- a/.history/build-table-script/R/table_20240704104510.R +++ /dev/null @@ -1,99 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers from directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -glimpse() - -# select(key,`Activity sector`) %>% -# mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -# mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -# mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -# mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -# mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -# mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -# merge(gbif_country,by="iso2") %>% -# glimpse() - -if(FALSE) { -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240704104514.R b/.history/build-table-script/R/table_20240704104514.R deleted file mode 100644 index f6ace29..0000000 --- a/.history/build-table-script/R/table_20240704104514.R +++ /dev/null @@ -1,99 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -glimpse() - -# select(key,`Activity sector`) %>% -# mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -# mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -# mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -# mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -# mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -# mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -# merge(gbif_country,by="iso2") %>% -# glimpse() - -if(FALSE) { -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240704104654.R b/.history/build-table-script/R/table_20240704104654.R deleted file mode 100644 index ad73644..0000000 --- a/.history/build-table-script/R/table_20240704104654.R +++ /dev/null @@ -1,99 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -glimpse() - -# mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -# mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -# mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -# mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -# mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -# mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -# merge(gbif_country,by="iso2") %>% -# glimpse() - -if(FALSE) { -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240704104703.R b/.history/build-table-script/R/table_20240704104703.R deleted file mode 100644 index 3a988df..0000000 --- a/.history/build-table-script/R/table_20240704104703.R +++ /dev/null @@ -1,99 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -glimpse() - -# mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -# mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -# mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -# mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -# mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -# merge(gbif_country,by="iso2") %>% -# glimpse() - -if(FALSE) { -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240704104717.R b/.history/build-table-script/R/table_20240704104717.R deleted file mode 100644 index fdb8235..0000000 --- a/.history/build-table-script/R/table_20240704104717.R +++ /dev/null @@ -1,100 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -head() |> -mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -glimpse() - -# mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -# mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -# mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -# mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -# mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -# merge(gbif_country,by="iso2") %>% -# glimpse() - -if(FALSE) { -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240704104745.R b/.history/build-table-script/R/table_20240704104745.R deleted file mode 100644 index 7d75685..0000000 --- a/.history/build-table-script/R/table_20240704104745.R +++ /dev/null @@ -1,100 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -tail() |> -mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -glimpse() - -# mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -# mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -# mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -# mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -# mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -# merge(gbif_country,by="iso2") %>% -# glimpse() - -if(FALSE) { -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240704104816.R b/.history/build-table-script/R/table_20240704104816.R deleted file mode 100644 index 916f0d1..0000000 --- a/.history/build-table-script/R/table_20240704104816.R +++ /dev/null @@ -1,102 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -tail() |> -72e23311-b65a-46d0-bc07-ff0a251b47e1 - -mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -glimpse() - -# mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -# mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -# mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -# mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -# mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -# merge(gbif_country,by="iso2") %>% -# glimpse() - -if(FALSE) { -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240704104837.R b/.history/build-table-script/R/table_20240704104837.R deleted file mode 100644 index cfd025a..0000000 --- a/.history/build-table-script/R/table_20240704104837.R +++ /dev/null @@ -1,102 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -tail() |> -72e23311-b65a-46d0-bc07-ff0a251b47e1 -rgbif::dataset_search(publishingOrg="72e23311-b65a-46d0-bc07-ff0a251b47e1",limit=1)$data -# mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -# glimpse() - -# mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -# mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -# mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -# mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -# mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -# merge(gbif_country,by="iso2") %>% -# glimpse() - -if(FALSE) { -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240704104841.R b/.history/build-table-script/R/table_20240704104841.R deleted file mode 100644 index 75fd0ff..0000000 --- a/.history/build-table-script/R/table_20240704104841.R +++ /dev/null @@ -1,102 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) -# tail() |> -# 72e23311-b65a-46d0-bc07-ff0a251b47e1 -rgbif::dataset_search(publishingOrg="72e23311-b65a-46d0-bc07-ff0a251b47e1",limit=1)$data -# mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -# glimpse() - -# mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -# mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -# mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -# mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -# mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -# merge(gbif_country,by="iso2") %>% -# glimpse() - -if(FALSE) { -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240704104853.R b/.history/build-table-script/R/table_20240704104853.R deleted file mode 100644 index 6af2405..0000000 --- a/.history/build-table-script/R/table_20240704104853.R +++ /dev/null @@ -1,103 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) -# tail() |> -# 72e23311-b65a-46d0-bc07-ff0a251b47e1 -rgbif::dataset_search(publishingOrg="72e23311-b65a-46d0-bc07-ff0a251b47e1",limit=1)$data - -# mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -# glimpse() - -# mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -# mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -# mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -# mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -# mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -# merge(gbif_country,by="iso2") %>% -# glimpse() - -if(FALSE) { -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240704105458.R b/.history/build-table-script/R/table_20240704105458.R deleted file mode 100644 index abb429c..0000000 --- a/.history/build-table-script/R/table_20240704105458.R +++ /dev/null @@ -1,103 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) -# tail() |> -# 72e23311-b65a-46d0-bc07-ff0a251b47e1 -rgbif::dataset_search(publishingOrg="72e23311-b65a-46d0-bc07-ff0a251b47e1",limit=1) - -# mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -# glimpse() - -# mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -# mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -# mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -# mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -# mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -# merge(gbif_country,by="iso2") %>% -# glimpse() - -if(FALSE) { -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240704105503.R b/.history/build-table-script/R/table_20240704105503.R deleted file mode 100644 index 66dcb94..0000000 --- a/.history/build-table-script/R/table_20240704105503.R +++ /dev/null @@ -1,103 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) -# tail() |> -# 72e23311-b65a-46d0-bc07-ff0a251b47e1 -rgbif::dataset_search(publishingOrg="72e23311-b65a-46d0-bc07-ff0a251b47e1") - -# mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -# glimpse() - -# mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -# mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -# mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -# mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -# mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -# merge(gbif_country,by="iso2") %>% -# glimpse() - -if(FALSE) { -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240704105640.R b/.history/build-table-script/R/table_20240704105640.R deleted file mode 100644 index 7aab567..0000000 --- a/.history/build-table-script/R/table_20240704105640.R +++ /dev/null @@ -1,103 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) -# tail() |> -# 72e23311-b65a-46d0-bc07-ff0a251b47e1 -rgbif::dataset_search(publishingOrg="72e23311-b65a-46d0-bc07-ff0a251b47e1") -rgbif::organizations(uuid="72e23311-b65a-46d0-bc07-ff0a251b47e1") -# mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -# glimpse() - -# mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -# mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -# mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -# mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -# mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -# merge(gbif_country,by="iso2") %>% -# glimpse() - -if(FALSE) { -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240704111427.R b/.history/build-table-script/R/table_20240704111427.R deleted file mode 100644 index 09f9e15..0000000 --- a/.history/build-table-script/R/table_20240704111427.R +++ /dev/null @@ -1,103 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) -# tail() |> -# 72e23311-b65a-46d0-bc07-ff0a251b47e1 -# rgbif::dataset_search(publishingOrg="72e23311-b65a-46d0-bc07-ff0a251b47e1") -rgbif::organizations(uuid="72e23311-b65a-46d0-bc07-ff0a251b47e1") -# mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -# glimpse() - -# mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -# mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -# mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -# mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -# mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -# merge(gbif_country,by="iso2") %>% -# glimpse() - -if(FALSE) { -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240704111524.R b/.history/build-table-script/R/table_20240704111524.R deleted file mode 100644 index 09f9e15..0000000 --- a/.history/build-table-script/R/table_20240704111524.R +++ /dev/null @@ -1,103 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) -# tail() |> -# 72e23311-b65a-46d0-bc07-ff0a251b47e1 -# rgbif::dataset_search(publishingOrg="72e23311-b65a-46d0-bc07-ff0a251b47e1") -rgbif::organizations(uuid="72e23311-b65a-46d0-bc07-ff0a251b47e1") -# mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -# glimpse() - -# mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -# mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -# mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -# mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -# mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -# merge(gbif_country,by="iso2") %>% -# glimpse() - -if(FALSE) { -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240704111545.R b/.history/build-table-script/R/table_20240704111545.R deleted file mode 100644 index 73eba13..0000000 --- a/.history/build-table-script/R/table_20240704111545.R +++ /dev/null @@ -1,104 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) |> -glimpse() -# tail() |> -# 72e23311-b65a-46d0-bc07-ff0a251b47e1 -# rgbif::dataset_search(publishingOrg="72e23311-b65a-46d0-bc07-ff0a251b47e1") -rgbif::organizations(uuid="72e23311-b65a-46d0-bc07-ff0a251b47e1") -# mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -# glimpse() - -# mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -# mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -# mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -# mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -# mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -# merge(gbif_country,by="iso2") %>% -# glimpse() - -if(FALSE) { -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240704111714.R b/.history/build-table-script/R/table_20240704111714.R deleted file mode 100644 index a20ee08..0000000 --- a/.history/build-table-script/R/table_20240704111714.R +++ /dev/null @@ -1,104 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) |> -glimpse() -# tail() |> -# 72e23311-b65a-46d0-bc07-ff0a251b47e1 -# rgbif::dataset_search(publishingOrg="72e23311-b65a-46d0-bc07-ff0a251b47e1") -rgbif::organizations(uuid="51818adb-2745-4201-9397-6d6dc433954f") -# mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -# glimpse() - -# mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -# mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -# mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -# mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -# mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -# merge(gbif_country,by="iso2") %>% -# glimpse() - -if(FALSE) { -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240704111733.R b/.history/build-table-script/R/table_20240704111733.R deleted file mode 100644 index 08a40e2..0000000 --- a/.history/build-table-script/R/table_20240704111733.R +++ /dev/null @@ -1,104 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) |> -glimpse() -# tail() |> -# 72e23311-b65a-46d0-bc07-ff0a251b47e1 -# rgbif::dataset_search(publishingOrg="72e23311-b65a-46d0-bc07-ff0a251b47e1") -rgbif::organizations(uuid="51818adb-2745-4201-9397-6d6dc433954f")$data -# mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -# glimpse() - -# mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -# mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -# mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -# mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -# mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -# merge(gbif_country,by="iso2") %>% -# glimpse() - -if(FALSE) { -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240704111811.R b/.history/build-table-script/R/table_20240704111811.R deleted file mode 100644 index e5d0c7e..0000000 --- a/.history/build-table-script/R/table_20240704111811.R +++ /dev/null @@ -1,104 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) |> -glimpse() -# tail() |> -# 72e23311-b65a-46d0-bc07-ff0a251b47e1 -# rgbif::dataset_search(publishingOrg="72e23311-b65a-46d0-bc07-ff0a251b47e1") -rgbif::organizations(uuid="51818adb-2745-4201-9397-6d6dc433954f")$data$title -# mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -# glimpse() - -# mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -# mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -# mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -# mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -# mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -# merge(gbif_country,by="iso2") %>% -# glimpse() - -if(FALSE) { -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240704111831.R b/.history/build-table-script/R/table_20240704111831.R deleted file mode 100644 index a63a374..0000000 --- a/.history/build-table-script/R/table_20240704111831.R +++ /dev/null @@ -1,104 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) |> -glimpse() -# tail() |> -# 72e23311-b65a-46d0-bc07-ff0a251b47e1 -# rgbif::dataset_search(publishingOrg="72e23311-b65a-46d0-bc07-ff0a251b47e1") -mutate(rgbif::organizations(uuid="51818adb-2745-4201-9397-6d6dc433954f",limit=1)$data$title -# mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -# glimpse() - -# mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -# mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -# mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -# mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -# mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -# merge(gbif_country,by="iso2") %>% -# glimpse() - -if(FALSE) { -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240704111841.R b/.history/build-table-script/R/table_20240704111841.R deleted file mode 100644 index 00dafc8..0000000 --- a/.history/build-table-script/R/table_20240704111841.R +++ /dev/null @@ -1,104 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) |> -glimpse() -# tail() |> -# 72e23311-b65a-46d0-bc07-ff0a251b47e1 -# rgbif::dataset_search(publishingOrg="72e23311-b65a-46d0-bc07-ff0a251b47e1") -mutate(rgbif::organizations(uuid=.x,limit=1)$data$title -# mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -# glimpse() - -# mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -# mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -# mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -# mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -# mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -# merge(gbif_country,by="iso2") %>% -# glimpse() - -if(FALSE) { -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240704111913.R b/.history/build-table-script/R/table_20240704111913.R deleted file mode 100644 index 81910f4..0000000 --- a/.history/build-table-script/R/table_20240704111913.R +++ /dev/null @@ -1,104 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) |> -glimpse() -# tail() |> -# 72e23311-b65a-46d0-bc07-ff0a251b47e1 -# rgbif::dataset_search(publishingOrg="72e23311-b65a-46d0-bc07-ff0a251b47e1") -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) -# mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -# glimpse() - -# mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -# mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -# mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -# mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -# mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -# merge(gbif_country,by="iso2") %>% -# glimpse() - -if(FALSE) { -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240704111920.R b/.history/build-table-script/R/table_20240704111920.R deleted file mode 100644 index 0dfa7d7..0000000 --- a/.history/build-table-script/R/table_20240704111920.R +++ /dev/null @@ -1,104 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) |> -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) |> -glimpse() -# tail() |> -# 72e23311-b65a-46d0-bc07-ff0a251b47e1 -# rgbif::dataset_search(publishingOrg="72e23311-b65a-46d0-bc07-ff0a251b47e1") -# mutate(name = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingOrganization)) %>% -# glimpse() - -# mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -# mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -# mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -# mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -# mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -# merge(gbif_country,by="iso2") %>% -# glimpse() - -if(FALSE) { -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240704112001.R b/.history/build-table-script/R/table_20240704112001.R deleted file mode 100644 index 8d4d074..0000000 --- a/.history/build-table-script/R/table_20240704112001.R +++ /dev/null @@ -1,97 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) |> -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) |> -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -if(FALSE) { -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240704112112.R b/.history/build-table-script/R/table_20240704112112.R deleted file mode 100644 index 71285aa..0000000 --- a/.history/build-table-script/R/table_20240704112112.R +++ /dev/null @@ -1,97 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) |> -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) |> -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::datasets(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -if(FALSE) { -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240704112238.R b/.history/build-table-script/R/table_20240704112238.R deleted file mode 100644 index 4a5e2b5..0000000 --- a/.history/build-table-script/R/table_20240704112238.R +++ /dev/null @@ -1,97 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) |> -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) |> -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -if(FALSE) { -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240704112450.R b/.history/build-table-script/R/table_20240704112450.R deleted file mode 100644 index 08d26bf..0000000 --- a/.history/build-table-script/R/table_20240704112450.R +++ /dev/null @@ -1,97 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) |> -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) |> -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::datasets(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -if(FALSE) { -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240704112520.R b/.history/build-table-script/R/table_20240704112520.R deleted file mode 100644 index 14dcbf0..0000000 --- a/.history/build-table-script/R/table_20240704112520.R +++ /dev/null @@ -1,97 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) |> -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) |> -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x,limit=1)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -if(FALSE) { -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240704112833.R b/.history/build-table-script/R/table_20240704112833.R deleted file mode 100644 index 1bca1f9..0000000 --- a/.history/build-table-script/R/table_20240704112833.R +++ /dev/null @@ -1,97 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) |> -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) |> -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -if(FALSE) { -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240704112837.R b/.history/build-table-script/R/table_20240704112837.R deleted file mode 100644 index 4008277..0000000 --- a/.history/build-table-script/R/table_20240704112837.R +++ /dev/null @@ -1,97 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) |> -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) |> -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -if(FALSE) { -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240704113137.R b/.history/build-table-script/R/table_20240704113137.R deleted file mode 100644 index 6c43d08..0000000 --- a/.history/build-table-script/R/table_20240704113137.R +++ /dev/null @@ -1,99 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) |> -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) |> -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -72e23311-b65a-46d0-bc07-ff0a251b47e1 - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -if(FALSE) { -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240704113144.R b/.history/build-table-script/R/table_20240704113144.R deleted file mode 100644 index 655115a..0000000 --- a/.history/build-table-script/R/table_20240704113144.R +++ /dev/null @@ -1,99 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) |> -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) |> -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -72e23311-b65a-46d0-bc07-ff0a251b47e1 - -if(FALSE) { -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240704113150.R b/.history/build-table-script/R/table_20240704113150.R deleted file mode 100644 index 8a3aa88..0000000 --- a/.history/build-table-script/R/table_20240704113150.R +++ /dev/null @@ -1,99 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) |> -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) |> -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -"72e23311-b65a-46d0-bc07-ff0a251b47e1" - -if(FALSE) { -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240704113202.R b/.history/build-table-script/R/table_20240704113202.R deleted file mode 100644 index fda6199..0000000 --- a/.history/build-table-script/R/table_20240704113202.R +++ /dev/null @@ -1,99 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) |> -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) |> -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -rgbif::dataset_get(uuid="72e23311-b65a-46d0-bc07-ff0a251b47e1") - -if(FALSE) { -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240704113250.R b/.history/build-table-script/R/table_20240704113250.R deleted file mode 100644 index b8d1831..0000000 --- a/.history/build-table-script/R/table_20240704113250.R +++ /dev/null @@ -1,99 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) |> -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) |> -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -rgbif::dataset_get(uuid="72e23311-b65a-46d0-bc07-ff0a251b47e1")$title - -if(FALSE) { -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240704113255.R b/.history/build-table-script/R/table_20240704113255.R deleted file mode 100644 index c3e8fd8..0000000 --- a/.history/build-table-script/R/table_20240704113255.R +++ /dev/null @@ -1,99 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) |> -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) |> -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -rgbif::dataset_get(uuid="72e23311-b65a-46d0-bc07-ff0a251b47e1")$title - -if(FALSE) { -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$data$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240704113311.R b/.history/build-table-script/R/table_20240704113311.R deleted file mode 100644 index 536af50..0000000 --- a/.history/build-table-script/R/table_20240704113311.R +++ /dev/null @@ -1,99 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) |> -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) |> -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -rgbif::dataset_get(uuid="72e23311-b65a-46d0-bc07-ff0a251b47e1")$title - -if(FALSE) { -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240704113316.R b/.history/build-table-script/R/table_20240704113316.R deleted file mode 100644 index df07ed5..0000000 --- a/.history/build-table-script/R/table_20240704113316.R +++ /dev/null @@ -1,99 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) |> -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) |> -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -rgbif::dataset_get(uuid="72e23311-b65a-46d0-bc07-ff0a251b47e1")$title - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -if(FALSE) { -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240704113434.R b/.history/build-table-script/R/table_20240704113434.R deleted file mode 100644 index 07998cf..0000000 --- a/.history/build-table-script/R/table_20240704113434.R +++ /dev/null @@ -1,100 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# don't need to read from source anymore since we read from machine tags -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) |> -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) |> -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -rgbif::dataset_get(uuid="72e23311-b65a-46d0-bc07-ff0a251b47e1")$title - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -if(FALSE) { -} \ No newline at end of file diff --git a/.history/build-table-script/R/table_20240704113512.R b/.history/build-table-script/R/table_20240704113512.R deleted file mode 100644 index 3c050f1..0000000 --- a/.history/build-table-script/R/table_20240704113512.R +++ /dev/null @@ -1,97 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# don't need to read from source anymore since we read from machine tags -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) |> -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) |> -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -rgbif::dataset_get(uuid="72e23311-b65a-46d0-bc07-ff0a251b47e1")$title - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() diff --git a/.history/build-table-script/R/table_20240704114121.R b/.history/build-table-script/R/table_20240704114121.R deleted file mode 100644 index e7d7f7d..0000000 --- a/.history/build-table-script/R/table_20240704114121.R +++ /dev/null @@ -1,99 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# don't need to read from source anymore since we read from machine tags -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) |> -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) |> -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -rgbif::dataset_get(uuid="72e23311-b65a-46d0-bc07-ff0a251b47e1")$title - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - - - -# save .adoc -save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -sink(file = save_file_table, type = "output") -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() - -# totals table -save_file_totals = "260-private-sector-totals.adoc" - -sink(file = save_file_totals, type = "output") -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) -sink() diff --git a/.history/build-table-script/R/table_20240704114454.R b/.history/build-table-script/R/table_20240704114454.R deleted file mode 100644 index caab04f..0000000 --- a/.history/build-table-script/R/table_20240704114454.R +++ /dev/null @@ -1,99 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# don't need to read from source anymore since we read from machine tags -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) |> -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) |> -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -rgbif::dataset_get(uuid="72e23311-b65a-46d0-bc07-ff0a251b47e1")$title - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - - - -# save .adoc -# save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -# sink(file = save_file_table, type = "output") -# tt %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() - -# totals table -# save_file_totals = "260-private-sector-totals.adoc" - -# sink(file = save_file_totals, type = "output") -# tt %>% -# summarise( -# Datasets = sum(Datasets), -# `Occurrence records` = sum(`Occurrence records`), -# `Data citations` = sum(`Data citations`) -# ) %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() diff --git a/.history/build-table-script/R/table_20240704114458.R b/.history/build-table-script/R/table_20240704114458.R deleted file mode 100644 index b188e54..0000000 --- a/.history/build-table-script/R/table_20240704114458.R +++ /dev/null @@ -1,99 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# don't need to read from source anymore since we read from machine tags -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) |> -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) |> -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -rgbif::dataset_get(uuid="72e23311-b65a-46d0-bc07-ff0a251b47e1")$title - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save as csv - -# save .adoc -# save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -# sink(file = save_file_table, type = "output") -# tt %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() - -# totals table -# save_file_totals = "260-private-sector-totals.adoc" - -# sink(file = save_file_totals, type = "output") -# tt %>% -# summarise( -# Datasets = sum(Datasets), -# `Occurrence records` = sum(`Occurrence records`), -# `Data citations` = sum(`Data citations`) -# ) %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() diff --git a/.history/build-table-script/R/table_20240704114512.R b/.history/build-table-script/R/table_20240704114512.R deleted file mode 100644 index 5060f1b..0000000 --- a/.history/build-table-script/R/table_20240704114512.R +++ /dev/null @@ -1,101 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# don't need to read from source anymore since we read from machine tags -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) |> -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) |> -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -rgbif::dataset_get(uuid="72e23311-b65a-46d0-bc07-ff0a251b47e1")$title - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# save as csv - - - -# save .adoc -# save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -# sink(file = save_file_table, type = "output") -# tt %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() - -# totals table -# save_file_totals = "260-private-sector-totals.adoc" - -# sink(file = save_file_totals, type = "output") -# tt %>% -# summarise( -# Datasets = sum(Datasets), -# `Occurrence records` = sum(`Occurrence records`), -# `Data citations` = sum(`Data citations`) -# ) %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() diff --git a/.history/build-table-script/R/table_20240704114648.R b/.history/build-table-script/R/table_20240704114648.R deleted file mode 100644 index 30bdde6..0000000 --- a/.history/build-table-script/R/table_20240704114648.R +++ /dev/null @@ -1,103 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# don't need to read from source anymore since we read from machine tags -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) |> -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) |> -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -rgbif::dataset_get(uuid="72e23311-b65a-46d0-bc07-ff0a251b47e1")$title - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -"https://www.gbif.org/publisher/f2429cd1-4d45-475c-852a-892024cb4aba[ARC - Arctic Research and Consulting DA]",{CONSULTING},{NO},1,8 914,63 - -# save as csv - - - -# save .adoc -# save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -# sink(file = save_file_table, type = "output") -# tt %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() - -# totals table -# save_file_totals = "260-private-sector-totals.adoc" - -# sink(file = save_file_totals, type = "output") -# tt %>% -# summarise( -# Datasets = sum(Datasets), -# `Occurrence records` = sum(`Occurrence records`), -# `Data citations` = sum(`Data citations`) -# ) %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() diff --git a/.history/build-table-script/R/table_20240704115542.R b/.history/build-table-script/R/table_20240704115542.R deleted file mode 100644 index 6635bfc..0000000 --- a/.history/build-table-script/R/table_20240704115542.R +++ /dev/null @@ -1,108 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# don't need to read from source anymore since we read from machine tags -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) |> -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) |> -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -rgbif::dataset_get(uuid="72e23311-b65a-46d0-bc07-ff0a251b47e1")$title - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -"https://www.gbif.org/publisher/f2429cd1-4d45-475c-852a-892024cb4aba[ARC - Arctic Research and Consulting DA]",{CONSULTING},{NO},1,8 914,63 - -# save as csv - -# tt %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) - - -# save .adoc -# save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -# sink(file = save_file_table, type = "output") -# tt %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() - -# totals table -# save_file_totals = "260-private-sector-totals.adoc" - -# sink(file = save_file_totals, type = "output") -# tt %>% -# summarise( -# Datasets = sum(Datasets), -# `Occurrence records` = sum(`Occurrence records`), -# `Data citations` = sum(`Data citations`) -# ) %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() diff --git a/.history/build-table-script/R/table_20240704115545.R b/.history/build-table-script/R/table_20240704115545.R deleted file mode 100644 index 6c07b46..0000000 --- a/.history/build-table-script/R/table_20240704115545.R +++ /dev/null @@ -1,108 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# don't need to read from source anymore since we read from machine tags -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) |> -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) |> -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -rgbif::dataset_get(uuid="72e23311-b65a-46d0-bc07-ff0a251b47e1")$title - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -"https://www.gbif.org/publisher/f2429cd1-4d45-475c-852a-892024cb4aba[ARC - Arctic Research and Consulting DA]",{CONSULTING},{NO},1,8 914,63 - -# save as csv - -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) - - -# save .adoc -# save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -# sink(file = save_file_table, type = "output") -# tt %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() - -# totals table -# save_file_totals = "260-private-sector-totals.adoc" - -# sink(file = save_file_totals, type = "output") -# tt %>% -# summarise( -# Datasets = sum(Datasets), -# `Occurrence records` = sum(`Occurrence records`), -# `Data citations` = sum(`Data citations`) -# ) %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() diff --git a/.history/build-table-script/R/table_20240704120053.R b/.history/build-table-script/R/table_20240704120053.R deleted file mode 100644 index 951171a..0000000 --- a/.history/build-table-script/R/table_20240704120053.R +++ /dev/null @@ -1,108 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# don't need to read from source anymore since we read from machine tags -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) |> -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) |> -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -rgbif::dataset_get(uuid="72e23311-b65a-46d0-bc07-ff0a251b47e1")$title - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# "https://www.gbif.org/publisher/f2429cd1-4d45-475c-852a-892024cb4aba[ARC - Arctic Research and Consulting DA]",{CONSULTING},{NO},1,8 914,63 - -# save as csv - -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -ascii::ascii(include.rownames = FALSE, digits = 0) - - -# save .adoc -# save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -# sink(file = save_file_table, type = "output") -# tt %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() - -# totals table -# save_file_totals = "260-private-sector-totals.adoc" - -# sink(file = save_file_totals, type = "output") -# tt %>% -# summarise( -# Datasets = sum(Datasets), -# `Occurrence records` = sum(`Occurrence records`), -# `Data citations` = sum(`Data citations`) -# ) %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() diff --git a/.history/build-table-script/R/table_20240704120141.R b/.history/build-table-script/R/table_20240704120141.R deleted file mode 100644 index 4c4378a..0000000 --- a/.history/build-table-script/R/table_20240704120141.R +++ /dev/null @@ -1,106 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# don't need to read from source anymore since we read from machine tags -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) |> -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) |> -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -rgbif::dataset_get(uuid="72e23311-b65a-46d0-bc07-ff0a251b47e1")$title - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# "https://www.gbif.org/publisher/f2429cd1-4d45-475c-852a-892024cb4aba[ARC - Arctic Research and Consulting DA]",{CONSULTING},{NO},1,8 914,63 - -# save as csv - -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) - -# save .adoc -# save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -# sink(file = save_file_table, type = "output") -# tt %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() - -# totals table -# save_file_totals = "260-private-sector-totals.adoc" - -# sink(file = save_file_totals, type = "output") -# tt %>% -# summarise( -# Datasets = sum(Datasets), -# `Occurrence records` = sum(`Occurrence records`), -# `Data citations` = sum(`Data citations`) -# ) %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() diff --git a/.history/build-table-script/R/table_20240704120227.R b/.history/build-table-script/R/table_20240704120227.R deleted file mode 100644 index 039c652..0000000 --- a/.history/build-table-script/R/table_20240704120227.R +++ /dev/null @@ -1,106 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# don't need to read from source anymore since we read from machine tags -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) |> -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) |> -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -rgbif::dataset_get(uuid="72e23311-b65a-46d0-bc07-ff0a251b47e1")$title - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# "https://www.gbif.org/publisher/f2429cd1-4d45-475c-852a-892024cb4aba[ARC - Arctic Research and Consulting DA]",{CONSULTING},{NO},1,8 914,63 - -# save as csv - -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark="U+202F")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) - -# save .adoc -# save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -# sink(file = save_file_table, type = "output") -# tt %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() - -# totals table -# save_file_totals = "260-private-sector-totals.adoc" - -# sink(file = save_file_totals, type = "output") -# tt %>% -# summarise( -# Datasets = sum(Datasets), -# `Occurrence records` = sum(`Occurrence records`), -# `Data citations` = sum(`Data citations`) -# ) %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() diff --git a/.history/build-table-script/R/table_20240704120233.R b/.history/build-table-script/R/table_20240704120233.R deleted file mode 100644 index 01e292c..0000000 --- a/.history/build-table-script/R/table_20240704120233.R +++ /dev/null @@ -1,106 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# don't need to read from source anymore since we read from machine tags -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) |> -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) |> -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -rgbif::dataset_get(uuid="72e23311-b65a-46d0-bc07-ff0a251b47e1")$title - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# "https://www.gbif.org/publisher/f2429cd1-4d45-475c-852a-892024cb4aba[ARC - Arctic Research and Consulting DA]",{CONSULTING},{NO},1,8 914,63 - -# save as csv - -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark="U+202F")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark="U+202F")) - -# save .adoc -# save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -# sink(file = save_file_table, type = "output") -# tt %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() - -# totals table -# save_file_totals = "260-private-sector-totals.adoc" - -# sink(file = save_file_totals, type = "output") -# tt %>% -# summarise( -# Datasets = sum(Datasets), -# `Occurrence records` = sum(`Occurrence records`), -# `Data citations` = sum(`Data citations`) -# ) %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() diff --git a/.history/build-table-script/R/table_20240704120236.R b/.history/build-table-script/R/table_20240704120236.R deleted file mode 100644 index 501c1c2..0000000 --- a/.history/build-table-script/R/table_20240704120236.R +++ /dev/null @@ -1,106 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# don't need to read from source anymore since we read from machine tags -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) |> -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) |> -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -rgbif::dataset_get(uuid="72e23311-b65a-46d0-bc07-ff0a251b47e1")$title - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# "https://www.gbif.org/publisher/f2429cd1-4d45-475c-852a-892024cb4aba[ARC - Arctic Research and Consulting DA]",{CONSULTING},{NO},1,8 914,63 - -# save as csv - -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark="U+202F")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark="U+202F")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark="U+202F")) - -# save .adoc -# save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -# sink(file = save_file_table, type = "output") -# tt %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() - -# totals table -# save_file_totals = "260-private-sector-totals.adoc" - -# sink(file = save_file_totals, type = "output") -# tt %>% -# summarise( -# Datasets = sum(Datasets), -# `Occurrence records` = sum(`Occurrence records`), -# `Data citations` = sum(`Data citations`) -# ) %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() diff --git a/.history/build-table-script/R/table_20240704120453.R b/.history/build-table-script/R/table_20240704120453.R deleted file mode 100644 index 583e04e..0000000 --- a/.history/build-table-script/R/table_20240704120453.R +++ /dev/null @@ -1,106 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# don't need to read from source anymore since we read from machine tags -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) |> -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) |> -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -rgbif::dataset_get(uuid="72e23311-b65a-46d0-bc07-ff0a251b47e1")$title - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# "https://www.gbif.org/publisher/f2429cd1-4d45-475c-852a-892024cb4aba[ARC - Arctic Research and Consulting DA]",{CONSULTING},{NO},1,8 914,63 - -# save as csv - -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark="\u202F")) - -# save .adoc -# save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -# sink(file = save_file_table, type = "output") -# tt %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() - -# totals table -# save_file_totals = "260-private-sector-totals.adoc" - -# sink(file = save_file_totals, type = "output") -# tt %>% -# summarise( -# Datasets = sum(Datasets), -# `Occurrence records` = sum(`Occurrence records`), -# `Data citations` = sum(`Data citations`) -# ) %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() diff --git a/.history/build-table-script/R/table_20240704120713.R b/.history/build-table-script/R/table_20240704120713.R deleted file mode 100644 index 10d9c76..0000000 --- a/.history/build-table-script/R/table_20240704120713.R +++ /dev/null @@ -1,106 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# don't need to read from source anymore since we read from machine tags -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) |> -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) |> -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -rgbif::dataset_get(uuid="72e23311-b65a-46d0-bc07-ff0a251b47e1")$title - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark="\u202F")) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# "https://www.gbif.org/publisher/f2429cd1-4d45-475c-852a-892024cb4aba[ARC - Arctic Research and Consulting DA]",{CONSULTING},{NO},1,8 914,63 - -# save as csv - -tt %>% - -# save .adoc -# save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -# sink(file = save_file_table, type = "output") -# tt %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() - -# totals table -# save_file_totals = "260-private-sector-totals.adoc" - -# sink(file = save_file_totals, type = "output") -# tt %>% -# summarise( -# Datasets = sum(Datasets), -# `Occurrence records` = sum(`Occurrence records`), -# `Data citations` = sum(`Data citations`) -# ) %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() diff --git a/.history/build-table-script/R/table_20240704120716.R b/.history/build-table-script/R/table_20240704120716.R deleted file mode 100644 index 10d9c76..0000000 --- a/.history/build-table-script/R/table_20240704120716.R +++ /dev/null @@ -1,106 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# don't need to read from source anymore since we read from machine tags -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) |> -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) |> -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -rgbif::dataset_get(uuid="72e23311-b65a-46d0-bc07-ff0a251b47e1")$title - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark="\u202F")) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# "https://www.gbif.org/publisher/f2429cd1-4d45-475c-852a-892024cb4aba[ARC - Arctic Research and Consulting DA]",{CONSULTING},{NO},1,8 914,63 - -# save as csv - -tt %>% - -# save .adoc -# save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -# sink(file = save_file_table, type = "output") -# tt %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() - -# totals table -# save_file_totals = "260-private-sector-totals.adoc" - -# sink(file = save_file_totals, type = "output") -# tt %>% -# summarise( -# Datasets = sum(Datasets), -# `Occurrence records` = sum(`Occurrence records`), -# `Data citations` = sum(`Data citations`) -# ) %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() diff --git a/.history/build-table-script/R/table_20240704120741.R b/.history/build-table-script/R/table_20240704120741.R deleted file mode 100644 index 30f461a..0000000 --- a/.history/build-table-script/R/table_20240704120741.R +++ /dev/null @@ -1,103 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# don't need to read from source anymore since we read from machine tags -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) |> -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) |> -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -rgbif::dataset_get(uuid="72e23311-b65a-46d0-bc07-ff0a251b47e1")$title - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables, clean up, save as csv -tt = rbind(pp,dd) %>% -arrange(name) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark="\u202F")) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# "https://www.gbif.org/publisher/f2429cd1-4d45-475c-852a-892024cb4aba[ARC - Arctic Research and Consulting DA]",{CONSULTING},{NO},1,8 914,63 - - -# save .adoc -# save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -# sink(file = save_file_table, type = "output") -# tt %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() - -# totals table -# save_file_totals = "260-private-sector-totals.adoc" - -# sink(file = save_file_totals, type = "output") -# tt %>% -# summarise( -# Datasets = sum(Datasets), -# `Occurrence records` = sum(`Occurrence records`), -# `Data citations` = sum(`Data citations`) -# ) %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() diff --git a/.history/build-table-script/R/table_20240704120832.R b/.history/build-table-script/R/table_20240704120832.R deleted file mode 100644 index 583e04e..0000000 --- a/.history/build-table-script/R/table_20240704120832.R +++ /dev/null @@ -1,106 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# don't need to read from source anymore since we read from machine tags -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) |> -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) |> -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -rgbif::dataset_get(uuid="72e23311-b65a-46d0-bc07-ff0a251b47e1")$title - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# "https://www.gbif.org/publisher/f2429cd1-4d45-475c-852a-892024cb4aba[ARC - Arctic Research and Consulting DA]",{CONSULTING},{NO},1,8 914,63 - -# save as csv - -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark="\u202F")) - -# save .adoc -# save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -# sink(file = save_file_table, type = "output") -# tt %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() - -# totals table -# save_file_totals = "260-private-sector-totals.adoc" - -# sink(file = save_file_totals, type = "output") -# tt %>% -# summarise( -# Datasets = sum(Datasets), -# `Occurrence records` = sum(`Occurrence records`), -# `Data citations` = sum(`Data citations`) -# ) %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() diff --git a/.history/build-table-script/R/table_20240704120836.R b/.history/build-table-script/R/table_20240704120836.R deleted file mode 100644 index 41e76ae..0000000 --- a/.history/build-table-script/R/table_20240704120836.R +++ /dev/null @@ -1,105 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# don't need to read from source anymore since we read from machine tags -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) |> -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) |> -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -rgbif::dataset_get(uuid="72e23311-b65a-46d0-bc07-ff0a251b47e1")$title - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# "https://www.gbif.org/publisher/f2429cd1-4d45-475c-852a-892024cb4aba[ARC - Arctic Research and Consulting DA]",{CONSULTING},{NO},1,8 914,63 - -# save as csv -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark="\u202F")) - -# save .adoc -# save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -# sink(file = save_file_table, type = "output") -# tt %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() - -# totals table -# save_file_totals = "260-private-sector-totals.adoc" - -# sink(file = save_file_totals, type = "output") -# tt %>% -# summarise( -# Datasets = sum(Datasets), -# `Occurrence records` = sum(`Occurrence records`), -# `Data citations` = sum(`Data citations`) -# ) %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() diff --git a/.history/build-table-script/R/table_20240704120941.R b/.history/build-table-script/R/table_20240704120941.R deleted file mode 100644 index f18c92e..0000000 --- a/.history/build-table-script/R/table_20240704120941.R +++ /dev/null @@ -1,106 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# don't need to read from source anymore since we read from machine tags -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) |> -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) |> -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -rgbif::dataset_get(uuid="72e23311-b65a-46d0-bc07-ff0a251b47e1")$title - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# "https://www.gbif.org/publisher/f2429cd1-4d45-475c-852a-892024cb4aba[ARC - Arctic Research and Consulting DA]",{CONSULTING},{NO},1,8 914,63 - -# save as csv -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark="\u202F")) -mutate(`Activity sector` = paste0("{",`Activity sector`,"}")) %>% - -# save .adoc -# save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -# sink(file = save_file_table, type = "output") -# tt %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() - -# totals table -# save_file_totals = "260-private-sector-totals.adoc" - -# sink(file = save_file_totals, type = "output") -# tt %>% -# summarise( -# Datasets = sum(Datasets), -# `Occurrence records` = sum(`Occurrence records`), -# `Data citations` = sum(`Data citations`) -# ) %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() diff --git a/.history/build-table-script/R/table_20240704120943.R b/.history/build-table-script/R/table_20240704120943.R deleted file mode 100644 index f18c92e..0000000 --- a/.history/build-table-script/R/table_20240704120943.R +++ /dev/null @@ -1,106 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# don't need to read from source anymore since we read from machine tags -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) |> -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) |> -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -rgbif::dataset_get(uuid="72e23311-b65a-46d0-bc07-ff0a251b47e1")$title - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# "https://www.gbif.org/publisher/f2429cd1-4d45-475c-852a-892024cb4aba[ARC - Arctic Research and Consulting DA]",{CONSULTING},{NO},1,8 914,63 - -# save as csv -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark="\u202F")) -mutate(`Activity sector` = paste0("{",`Activity sector`,"}")) %>% - -# save .adoc -# save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -# sink(file = save_file_table, type = "output") -# tt %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() - -# totals table -# save_file_totals = "260-private-sector-totals.adoc" - -# sink(file = save_file_totals, type = "output") -# tt %>% -# summarise( -# Datasets = sum(Datasets), -# `Occurrence records` = sum(`Occurrence records`), -# `Data citations` = sum(`Data citations`) -# ) %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() diff --git a/.history/build-table-script/R/table_20240704120945.R b/.history/build-table-script/R/table_20240704120945.R deleted file mode 100644 index 9de4797..0000000 --- a/.history/build-table-script/R/table_20240704120945.R +++ /dev/null @@ -1,106 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# don't need to read from source anymore since we read from machine tags -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) |> -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) |> -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -rgbif::dataset_get(uuid="72e23311-b65a-46d0-bc07-ff0a251b47e1")$title - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# "https://www.gbif.org/publisher/f2429cd1-4d45-475c-852a-892024cb4aba[ARC - Arctic Research and Consulting DA]",{CONSULTING},{NO},1,8 914,63 - -# save as csv -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark="\u202F")) -mutate(`Activity sector` = paste0("{",`Activity sector`,"}")) - -# save .adoc -# save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -# sink(file = save_file_table, type = "output") -# tt %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() - -# totals table -# save_file_totals = "260-private-sector-totals.adoc" - -# sink(file = save_file_totals, type = "output") -# tt %>% -# summarise( -# Datasets = sum(Datasets), -# `Occurrence records` = sum(`Occurrence records`), -# `Data citations` = sum(`Data citations`) -# ) %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() diff --git a/.history/build-table-script/R/table_20240704121033.R b/.history/build-table-script/R/table_20240704121033.R deleted file mode 100644 index 1374d5f..0000000 --- a/.history/build-table-script/R/table_20240704121033.R +++ /dev/null @@ -1,106 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# don't need to read from source anymore since we read from machine tags -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) |> -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) |> -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -rgbif::dataset_get(uuid="72e23311-b65a-46d0-bc07-ff0a251b47e1")$title - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# "https://www.gbif.org/publisher/f2429cd1-4d45-475c-852a-892024cb4aba[ARC - Arctic Research and Consulting DA]",{CONSULTING},{NO},1,8 914,63 - -# save as csv -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Activity sector` = paste0("{",`Activity sector`,"}")) - -# save .adoc -# save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -# sink(file = save_file_table, type = "output") -# tt %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() - -# totals table -# save_file_totals = "260-private-sector-totals.adoc" - -# sink(file = save_file_totals, type = "output") -# tt %>% -# summarise( -# Datasets = sum(Datasets), -# `Occurrence records` = sum(`Occurrence records`), -# `Data citations` = sum(`Data citations`) -# ) %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() diff --git a/.history/build-table-script/R/table_20240704121414.R b/.history/build-table-script/R/table_20240704121414.R deleted file mode 100644 index e7d0192..0000000 --- a/.history/build-table-script/R/table_20240704121414.R +++ /dev/null @@ -1,108 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# don't need to read from source anymore since we read from machine tags -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) |> -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) |> -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -rgbif::dataset_get(uuid="72e23311-b65a-46d0-bc07-ff0a251b47e1")$title - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -mutate(country = iso2) %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# "https://www.gbif.org/publisher/f2429cd1-4d45-475c-852a-892024cb4aba[ARC - Arctic Research and Consulting DA]",{CONSULTING},{NO},1,8 914,63 - -# save as csv -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Activity sector` = paste0("{",`Activity sector`,"}")) %>% -mutate(`Country` = paste0("{",`Country`,"}")) %>% - -# save .adoc -# save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -# sink(file = save_file_table, type = "output") -# tt %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() - -# totals table -# save_file_totals = "260-private-sector-totals.adoc" - -# sink(file = save_file_totals, type = "output") -# tt %>% -# summarise( -# Datasets = sum(Datasets), -# `Occurrence records` = sum(`Occurrence records`), -# `Data citations` = sum(`Data citations`) -# ) %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() diff --git a/.history/build-table-script/R/table_20240704121416.R b/.history/build-table-script/R/table_20240704121416.R deleted file mode 100644 index e7d0192..0000000 --- a/.history/build-table-script/R/table_20240704121416.R +++ /dev/null @@ -1,108 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# don't need to read from source anymore since we read from machine tags -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) |> -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) |> -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -rgbif::dataset_get(uuid="72e23311-b65a-46d0-bc07-ff0a251b47e1")$title - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -mutate(country = iso2) %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# "https://www.gbif.org/publisher/f2429cd1-4d45-475c-852a-892024cb4aba[ARC - Arctic Research and Consulting DA]",{CONSULTING},{NO},1,8 914,63 - -# save as csv -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Activity sector` = paste0("{",`Activity sector`,"}")) %>% -mutate(`Country` = paste0("{",`Country`,"}")) %>% - -# save .adoc -# save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -# sink(file = save_file_table, type = "output") -# tt %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() - -# totals table -# save_file_totals = "260-private-sector-totals.adoc" - -# sink(file = save_file_totals, type = "output") -# tt %>% -# summarise( -# Datasets = sum(Datasets), -# `Occurrence records` = sum(`Occurrence records`), -# `Data citations` = sum(`Data citations`) -# ) %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() diff --git a/.history/build-table-script/R/table_20240704121422.R b/.history/build-table-script/R/table_20240704121422.R deleted file mode 100644 index 106020c..0000000 --- a/.history/build-table-script/R/table_20240704121422.R +++ /dev/null @@ -1,108 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# don't need to read from source anymore since we read from machine tags -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) |> -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) |> -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -rgbif::dataset_get(uuid="72e23311-b65a-46d0-bc07-ff0a251b47e1")$title - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -mutate(country = iso2) %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# "https://www.gbif.org/publisher/f2429cd1-4d45-475c-852a-892024cb4aba[ARC - Arctic Research and Consulting DA]",{CONSULTING},{NO},1,8 914,63 - -# save as csv -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Activity sector` = paste0("{",`Activity sector`,"}")) %>% -mutate(`Country` = paste0("{",`Country`,"}")) - -# save .adoc -# save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -# sink(file = save_file_table, type = "output") -# tt %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() - -# totals table -# save_file_totals = "260-private-sector-totals.adoc" - -# sink(file = save_file_totals, type = "output") -# tt %>% -# summarise( -# Datasets = sum(Datasets), -# `Occurrence records` = sum(`Occurrence records`), -# `Data citations` = sum(`Data citations`) -# ) %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() diff --git a/.history/build-table-script/R/table_20240704121423.R b/.history/build-table-script/R/table_20240704121423.R deleted file mode 100644 index 106020c..0000000 --- a/.history/build-table-script/R/table_20240704121423.R +++ /dev/null @@ -1,108 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# don't need to read from source anymore since we read from machine tags -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) |> -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) |> -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -rgbif::dataset_get(uuid="72e23311-b65a-46d0-bc07-ff0a251b47e1")$title - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -mutate(country = iso2) %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# "https://www.gbif.org/publisher/f2429cd1-4d45-475c-852a-892024cb4aba[ARC - Arctic Research and Consulting DA]",{CONSULTING},{NO},1,8 914,63 - -# save as csv -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Activity sector` = paste0("{",`Activity sector`,"}")) %>% -mutate(`Country` = paste0("{",`Country`,"}")) - -# save .adoc -# save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -# sink(file = save_file_table, type = "output") -# tt %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() - -# totals table -# save_file_totals = "260-private-sector-totals.adoc" - -# sink(file = save_file_totals, type = "output") -# tt %>% -# summarise( -# Datasets = sum(Datasets), -# `Occurrence records` = sum(`Occurrence records`), -# `Data citations` = sum(`Data citations`) -# ) %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() diff --git a/.history/build-table-script/R/table_20240704121632.R b/.history/build-table-script/R/table_20240704121632.R deleted file mode 100644 index d9d53de..0000000 --- a/.history/build-table-script/R/table_20240704121632.R +++ /dev/null @@ -1,108 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# don't need to read from source anymore since we read from machine tags -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) |> -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) |> -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -rgbif::dataset_get(uuid="72e23311-b65a-46d0-bc07-ff0a251b47e1")$title - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() -# mutate(country = iso2) %>% -# select(-p_key) %>% - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# "https://www.gbif.org/publisher/f2429cd1-4d45-475c-852a-892024cb4aba[ARC - Arctic Research and Consulting DA]",{CONSULTING},{NO},1,8 914,63 - -# save as csv -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Activity sector` = paste0("{",`Activity sector`,"}")) %>% -mutate(`Country` = paste0("{",`Country`,"}")) - -# save .adoc -# save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -# sink(file = save_file_table, type = "output") -# tt %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() - -# totals table -# save_file_totals = "260-private-sector-totals.adoc" - -# sink(file = save_file_totals, type = "output") -# tt %>% -# summarise( -# Datasets = sum(Datasets), -# `Occurrence records` = sum(`Occurrence records`), -# `Data citations` = sum(`Data citations`) -# ) %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() diff --git a/.history/build-table-script/R/table_20240704121652.R b/.history/build-table-script/R/table_20240704121652.R deleted file mode 100644 index 92d4c79..0000000 --- a/.history/build-table-script/R/table_20240704121652.R +++ /dev/null @@ -1,106 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# don't need to read from source anymore since we read from machine tags -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) |> -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) |> -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() -# mutate(country = iso2) %>% -# select(-p_key) %>% - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# "https://www.gbif.org/publisher/f2429cd1-4d45-475c-852a-892024cb4aba[ARC - Arctic Research and Consulting DA]",{CONSULTING},{NO},1,8 914,63 - -# save as csv -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Activity sector` = paste0("{",`Activity sector`,"}")) %>% -mutate(`Country` = paste0("{",`Country`,"}")) - -# save .adoc -# save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -# sink(file = save_file_table, type = "output") -# tt %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() - -# totals table -# save_file_totals = "260-private-sector-totals.adoc" - -# sink(file = save_file_totals, type = "output") -# tt %>% -# summarise( -# Datasets = sum(Datasets), -# `Occurrence records` = sum(`Occurrence records`), -# `Data citations` = sum(`Data citations`) -# ) %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() diff --git a/.history/build-table-script/R/table_20240704121703.R b/.history/build-table-script/R/table_20240704121703.R deleted file mode 100644 index 576e3c3..0000000 --- a/.history/build-table-script/R/table_20240704121703.R +++ /dev/null @@ -1,106 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# don't need to read from source anymore since we read from machine tags -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) |> -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) |> -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() -# mutate(country = iso2) %>% - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# "https://www.gbif.org/publisher/f2429cd1-4d45-475c-852a-892024cb4aba[ARC - Arctic Research and Consulting DA]",{CONSULTING},{NO},1,8 914,63 - -# save as csv -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Activity sector` = paste0("{",`Activity sector`,"}")) %>% -mutate(`Country` = paste0("{",`Country`,"}")) - -# save .adoc -# save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -# sink(file = save_file_table, type = "output") -# tt %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() - -# totals table -# save_file_totals = "260-private-sector-totals.adoc" - -# sink(file = save_file_totals, type = "output") -# tt %>% -# summarise( -# Datasets = sum(Datasets), -# `Occurrence records` = sum(`Occurrence records`), -# `Data citations` = sum(`Data citations`) -# ) %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() diff --git a/.history/build-table-script/R/table_20240704121810.R b/.history/build-table-script/R/table_20240704121810.R deleted file mode 100644 index db80bea..0000000 --- a/.history/build-table-script/R/table_20240704121810.R +++ /dev/null @@ -1,106 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# don't need to read from source anymore since we read from machine tags -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) |> -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) |> -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() -# mutate(country = iso2) %>% - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country = iso2, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() - -# "https://www.gbif.org/publisher/f2429cd1-4d45-475c-852a-892024cb4aba[ARC - Arctic Research and Consulting DA]",{CONSULTING},{NO},1,8 914,63 - -# save as csv -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Activity sector` = paste0("{",`Activity sector`,"}")) %>% -mutate(`Country` = paste0("{",`Country`,"}")) - -# save .adoc -# save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -# sink(file = save_file_table, type = "output") -# tt %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() - -# totals table -# save_file_totals = "260-private-sector-totals.adoc" - -# sink(file = save_file_totals, type = "output") -# tt %>% -# summarise( -# Datasets = sum(Datasets), -# `Occurrence records` = sum(`Occurrence records`), -# `Data citations` = sum(`Data citations`) -# ) %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() diff --git a/.history/build-table-script/R/table_20240704122005.R b/.history/build-table-script/R/table_20240704122005.R deleted file mode 100644 index bb89c88..0000000 --- a/.history/build-table-script/R/table_20240704122005.R +++ /dev/null @@ -1,108 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# don't need to read from source anymore since we read from machine tags -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) |> -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) |> -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() -# mutate(country = iso2) %>% - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country = iso2, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() %>% -write.csv(file = "your_file.csv", row.names = FALSE, col.names = FALSE) - - -# "https://www.gbif.org/publisher/f2429cd1-4d45-475c-852a-892024cb4aba[ARC - Arctic Research and Consulting DA]",{CONSULTING},{NO},1,8 914,63 - -# save as csv -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Activity sector` = paste0("{",`Activity sector`,"}")) %>% -mutate(`Country` = paste0("{",`Country`,"}")) - -# save .adoc -# save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -# sink(file = save_file_table, type = "output") -# tt %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() - -# totals table -# save_file_totals = "260-private-sector-totals.adoc" - -# sink(file = save_file_totals, type = "output") -# tt %>% -# summarise( -# Datasets = sum(Datasets), -# `Occurrence records` = sum(`Occurrence records`), -# `Data citations` = sum(`Data citations`) -# ) %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() diff --git a/.history/build-table-script/R/table_20240704122024.R b/.history/build-table-script/R/table_20240704122024.R deleted file mode 100644 index 15d45d3..0000000 --- a/.history/build-table-script/R/table_20240704122024.R +++ /dev/null @@ -1,108 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# don't need to read from source anymore since we read from machine tags -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) |> -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) |> -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() -# mutate(country = iso2) %>% - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country = iso2, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() %>% -write.csv(file = "250-private-sector-table.csv", row.names = FALSE, col.names = FALSE) - - -# "https://www.gbif.org/publisher/f2429cd1-4d45-475c-852a-892024cb4aba[ARC - Arctic Research and Consulting DA]",{CONSULTING},{NO},1,8 914,63 - -# save as csv -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Activity sector` = paste0("{",`Activity sector`,"}")) %>% -mutate(`Country` = paste0("{",`Country`,"}")) - -# save .adoc -# save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -# sink(file = save_file_table, type = "output") -# tt %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() - -# totals table -# save_file_totals = "260-private-sector-totals.adoc" - -# sink(file = save_file_totals, type = "output") -# tt %>% -# summarise( -# Datasets = sum(Datasets), -# `Occurrence records` = sum(`Occurrence records`), -# `Data citations` = sum(`Data citations`) -# ) %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() diff --git a/.history/build-table-script/R/table_20240704122025.R b/.history/build-table-script/R/table_20240704122025.R deleted file mode 100644 index 15d45d3..0000000 --- a/.history/build-table-script/R/table_20240704122025.R +++ /dev/null @@ -1,108 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# don't need to read from source anymore since we read from machine tags -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) |> -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) |> -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() -# mutate(country = iso2) %>% - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country = iso2, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() %>% -write.csv(file = "250-private-sector-table.csv", row.names = FALSE, col.names = FALSE) - - -# "https://www.gbif.org/publisher/f2429cd1-4d45-475c-852a-892024cb4aba[ARC - Arctic Research and Consulting DA]",{CONSULTING},{NO},1,8 914,63 - -# save as csv -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Activity sector` = paste0("{",`Activity sector`,"}")) %>% -mutate(`Country` = paste0("{",`Country`,"}")) - -# save .adoc -# save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -# sink(file = save_file_table, type = "output") -# tt %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() - -# totals table -# save_file_totals = "260-private-sector-totals.adoc" - -# sink(file = save_file_totals, type = "output") -# tt %>% -# summarise( -# Datasets = sum(Datasets), -# `Occurrence records` = sum(`Occurrence records`), -# `Data citations` = sum(`Data citations`) -# ) %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() diff --git a/.history/build-table-script/R/table_20240704122031.R b/.history/build-table-script/R/table_20240704122031.R deleted file mode 100644 index 17b183b..0000000 --- a/.history/build-table-script/R/table_20240704122031.R +++ /dev/null @@ -1,108 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# don't need to read from source anymore since we read from machine tags -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) |> -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) |> -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() -# mutate(country = iso2) %>% - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country = iso2, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() %>% -write.csv(file = "250-private-sector-table-test.csv", row.names = FALSE, col.names = FALSE) - - -# "https://www.gbif.org/publisher/f2429cd1-4d45-475c-852a-892024cb4aba[ARC - Arctic Research and Consulting DA]",{CONSULTING},{NO},1,8 914,63 - -# save as csv -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Activity sector` = paste0("{",`Activity sector`,"}")) %>% -mutate(`Country` = paste0("{",`Country`,"}")) - -# save .adoc -# save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -# sink(file = save_file_table, type = "output") -# tt %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() - -# totals table -# save_file_totals = "260-private-sector-totals.adoc" - -# sink(file = save_file_totals, type = "output") -# tt %>% -# summarise( -# Datasets = sum(Datasets), -# `Occurrence records` = sum(`Occurrence records`), -# `Data citations` = sum(`Data citations`) -# ) %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() diff --git a/.history/build-table-script/R/table_20240704122102.R b/.history/build-table-script/R/table_20240704122102.R deleted file mode 100644 index cdb518b..0000000 --- a/.history/build-table-script/R/table_20240704122102.R +++ /dev/null @@ -1,107 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# don't need to read from source anymore since we read from machine tags -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) |> -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) |> -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country = iso2, Datasets, `Occurrence records`, `Data citations`) %>% -glimpse() %>% -write.csv(file = "250-private-sector-table-test.csv", row.names = FALSE, col.names = FALSE) - - -# "https://www.gbif.org/publisher/f2429cd1-4d45-475c-852a-892024cb4aba[ARC - Arctic Research and Consulting DA]",{CONSULTING},{NO},1,8 914,63 - -# save as csv -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Activity sector` = paste0("{",`Activity sector`,"}")) %>% -mutate(`Country` = paste0("{",`Country`,"}")) - -# save .adoc -# save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -# sink(file = save_file_table, type = "output") -# tt %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() - -# totals table -# save_file_totals = "260-private-sector-totals.adoc" - -# sink(file = save_file_totals, type = "output") -# tt %>% -# summarise( -# Datasets = sum(Datasets), -# `Occurrence records` = sum(`Occurrence records`), -# `Data citations` = sum(`Data citations`) -# ) %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() diff --git a/.history/build-table-script/R/table_20240704122152.R b/.history/build-table-script/R/table_20240704122152.R deleted file mode 100644 index 8076f7d..0000000 --- a/.history/build-table-script/R/table_20240704122152.R +++ /dev/null @@ -1,106 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# don't need to read from source anymore since we read from machine tags -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) |> -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) |> -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country = iso2, Datasets, `Occurrence records`, `Data citations`) %>% - - -# "https://www.gbif.org/publisher/f2429cd1-4d45-475c-852a-892024cb4aba[ARC - Arctic Research and Consulting DA]",{CONSULTING},{NO},1,8 914,63 - -# save as csv -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Activity sector` = paste0("{",`Activity sector`,"}")) %>% -mutate(`Country` = paste0("{",`Country`,"}")) -write.csv(file = "250-private-sector-table-test.csv", row.names = FALSE, col.names = FALSE) - -# save .adoc -# save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -# sink(file = save_file_table, type = "output") -# tt %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() - -# totals table -# save_file_totals = "260-private-sector-totals.adoc" - -# sink(file = save_file_totals, type = "output") -# tt %>% -# summarise( -# Datasets = sum(Datasets), -# `Occurrence records` = sum(`Occurrence records`), -# `Data citations` = sum(`Data citations`) -# ) %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() diff --git a/.history/build-table-script/R/table_20240704122154.R b/.history/build-table-script/R/table_20240704122154.R deleted file mode 100644 index 43dc555..0000000 --- a/.history/build-table-script/R/table_20240704122154.R +++ /dev/null @@ -1,106 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# don't need to read from source anymore since we read from machine tags -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) |> -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) |> -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country = iso2, Datasets, `Occurrence records`, `Data citations`) - - -# "https://www.gbif.org/publisher/f2429cd1-4d45-475c-852a-892024cb4aba[ARC - Arctic Research and Consulting DA]",{CONSULTING},{NO},1,8 914,63 - -# save as csv -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Activity sector` = paste0("{",`Activity sector`,"}")) %>% -mutate(`Country` = paste0("{",`Country`,"}")) -write.csv(file = "250-private-sector-table-test.csv", row.names = FALSE, col.names = FALSE) - -# save .adoc -# save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -# sink(file = save_file_table, type = "output") -# tt %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() - -# totals table -# save_file_totals = "260-private-sector-totals.adoc" - -# sink(file = save_file_totals, type = "output") -# tt %>% -# summarise( -# Datasets = sum(Datasets), -# `Occurrence records` = sum(`Occurrence records`), -# `Data citations` = sum(`Data citations`) -# ) %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() diff --git a/.history/build-table-script/R/table_20240704122352.R b/.history/build-table-script/R/table_20240704122352.R deleted file mode 100644 index 933e570..0000000 --- a/.history/build-table-script/R/table_20240704122352.R +++ /dev/null @@ -1,106 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# don't need to read from source anymore since we read from machine tags -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) |> -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) |> -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country = iso2, Datasets, `Occurrence records`, `Data citations`) - - -# "https://www.gbif.org/publisher/f2429cd1-4d45-475c-852a-892024cb4aba[ARC - Arctic Research and Consulting DA]",{CONSULTING},{NO},1,8 914,63 - -# save as csv -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Activity sector` = paste0("{",`Activity sector`,"}")) %>% -mutate(`Country` = paste0("{",`Country`,"}")) %>% -write.csv(file = "250-private-sector-table-test.csv", row.names = FALSE, col.names = FALSE) - -# save .adoc -# save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -# sink(file = save_file_table, type = "output") -# tt %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() - -# totals table -# save_file_totals = "260-private-sector-totals.adoc" - -# sink(file = save_file_totals, type = "output") -# tt %>% -# summarise( -# Datasets = sum(Datasets), -# `Occurrence records` = sum(`Occurrence records`), -# `Data citations` = sum(`Data citations`) -# ) %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() diff --git a/.history/build-table-script/R/table_20240704122524.R b/.history/build-table-script/R/table_20240704122524.R deleted file mode 100644 index a526025..0000000 --- a/.history/build-table-script/R/table_20240704122524.R +++ /dev/null @@ -1,105 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# don't need to read from source anymore since we read from machine tags -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) |> -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) |> -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country = iso2, Datasets, `Occurrence records`, `Data citations`) - - -# "https://www.gbif.org/publisher/f2429cd1-4d45-475c-852a-892024cb4aba[ARC - Arctic Research and Consulting DA]",{CONSULTING},{NO},1,8 914,63 - -# save as csv -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Activity sector` = paste0("{",`Activity sector`,"}")) %>% -mutate(`Country` = paste0("{",`Country`,"}")) %>% -write.table(df, file = "250-private-sector-table-test.csv", row.names = FALSE, col.names = FALSE, sep = ",", quote = FALSE) -# save .adoc -# save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -# sink(file = save_file_table, type = "output") -# tt %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() - -# totals table -# save_file_totals = "260-private-sector-totals.adoc" - -# sink(file = save_file_totals, type = "output") -# tt %>% -# summarise( -# Datasets = sum(Datasets), -# `Occurrence records` = sum(`Occurrence records`), -# `Data citations` = sum(`Data citations`) -# ) %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() diff --git a/.history/build-table-script/R/table_20240704122533.R b/.history/build-table-script/R/table_20240704122533.R deleted file mode 100644 index 334bbfd..0000000 --- a/.history/build-table-script/R/table_20240704122533.R +++ /dev/null @@ -1,106 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# don't need to read from source anymore since we read from machine tags -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) |> -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) |> -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country = iso2, Datasets, `Occurrence records`, `Data citations`) - - -# "https://www.gbif.org/publisher/f2429cd1-4d45-475c-852a-892024cb4aba[ARC - Arctic Research and Consulting DA]",{CONSULTING},{NO},1,8 914,63 - -# save as csv -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Activity sector` = paste0("{",`Activity sector`,"}")) %>% -mutate(`Country` = paste0("{",`Country`,"}")) %>% -write.table(df, file = "250-private-sector-table-test.csv", row.names = FALSE, col.names = FALSE, sep = ",", quote = FALSE) - -# save .adoc -# save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -# sink(file = save_file_table, type = "output") -# tt %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() - -# totals table -# save_file_totals = "260-private-sector-totals.adoc" - -# sink(file = save_file_totals, type = "output") -# tt %>% -# summarise( -# Datasets = sum(Datasets), -# `Occurrence records` = sum(`Occurrence records`), -# `Data citations` = sum(`Data citations`) -# ) %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() diff --git a/.history/build-table-script/R/table_20240704122610.R b/.history/build-table-script/R/table_20240704122610.R deleted file mode 100644 index a1d80fc..0000000 --- a/.history/build-table-script/R/table_20240704122610.R +++ /dev/null @@ -1,106 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# don't need to read from source anymore since we read from machine tags -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) |> -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) |> -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country = iso2, Datasets, `Occurrence records`, `Data citations`) - - -# "https://www.gbif.org/publisher/f2429cd1-4d45-475c-852a-892024cb4aba[ARC - Arctic Research and Consulting DA]",{CONSULTING},{NO},1,8 914,63 - -# save as csv -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Activity sector` = paste0("{",`Activity sector`,"}")) %>% -mutate(`Country` = paste0("{",`Country`,"}")) %>% -write.table(file = "250-private-sector-table-test.csv", row.names = FALSE, col.names = FALSE, sep = ",", quote = FALSE) - -# save .adoc -# save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -# sink(file = save_file_table, type = "output") -# tt %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() - -# totals table -# save_file_totals = "260-private-sector-totals.adoc" - -# sink(file = save_file_totals, type = "output") -# tt %>% -# summarise( -# Datasets = sum(Datasets), -# `Occurrence records` = sum(`Occurrence records`), -# `Data citations` = sum(`Data citations`) -# ) %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() diff --git a/.history/build-table-script/R/table_20240704122910.R b/.history/build-table-script/R/table_20240704122910.R deleted file mode 100644 index 3f298da..0000000 --- a/.history/build-table-script/R/table_20240704122910.R +++ /dev/null @@ -1,106 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# don't need to read from source anymore since we read from machine tags -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) |> -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) |> -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country = iso2, Datasets, `Occurrence records`, `Data citations`) - - -# "https://www.gbif.org/publisher/f2429cd1-4d45-475c-852a-892024cb4aba[ARC - Arctic Research and Consulting DA]",{CONSULTING},{NO},1,8 914,63 - -# save as csv -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Data citations` = trimws(format(`Data citations`, nsmall=0, big.mark="\u202F")),which ="left") %>% -mutate(`Activity sector` = paste0("{",`Activity sector`,"}")) %>% -mutate(`Country` = paste0("{",`Country`,"}")) %>% -write.table(file = "250-private-sector-table-test.csv", row.names = FALSE, col.names = FALSE, sep = ",", quote = FALSE) - -# save .adoc -# save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -# sink(file = save_file_table, type = "output") -# tt %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() - -# totals table -# save_file_totals = "260-private-sector-totals.adoc" - -# sink(file = save_file_totals, type = "output") -# tt %>% -# summarise( -# Datasets = sum(Datasets), -# `Occurrence records` = sum(`Occurrence records`), -# `Data citations` = sum(`Data citations`) -# ) %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() diff --git a/.history/build-table-script/R/table_20240704123007.R b/.history/build-table-script/R/table_20240704123007.R deleted file mode 100644 index 82b6ea4..0000000 --- a/.history/build-table-script/R/table_20240704123007.R +++ /dev/null @@ -1,106 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# don't need to read from source anymore since we read from machine tags -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) |> -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) |> -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country = iso2, Datasets, `Occurrence records`, `Data citations`) - - -# "https://www.gbif.org/publisher/f2429cd1-4d45-475c-852a-892024cb4aba[ARC - Arctic Research and Consulting DA]",{CONSULTING},{NO},1,8 914,63 - -# save as csv -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Data citations` = trimws(format(`Data citations`, nsmall=0, big.mark="\u202F"),which ="left"))) %>% -mutate(`Activity sector` = paste0("{",`Activity sector`,"}")) %>% -mutate(`Country` = paste0("{",`Country`,"}")) %>% -write.table(file = "250-private-sector-table-test.csv", row.names = FALSE, col.names = FALSE, sep = ",", quote = FALSE) - -# save .adoc -# save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -# sink(file = save_file_table, type = "output") -# tt %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() - -# totals table -# save_file_totals = "260-private-sector-totals.adoc" - -# sink(file = save_file_totals, type = "output") -# tt %>% -# summarise( -# Datasets = sum(Datasets), -# `Occurrence records` = sum(`Occurrence records`), -# `Data citations` = sum(`Data citations`) -# ) %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() diff --git a/.history/build-table-script/R/table_20240704123011.R b/.history/build-table-script/R/table_20240704123011.R deleted file mode 100644 index 23863d1..0000000 --- a/.history/build-table-script/R/table_20240704123011.R +++ /dev/null @@ -1,106 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# don't need to read from source anymore since we read from machine tags -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) |> -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) |> -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country = iso2, Datasets, `Occurrence records`, `Data citations`) - - -# "https://www.gbif.org/publisher/f2429cd1-4d45-475c-852a-892024cb4aba[ARC - Arctic Research and Consulting DA]",{CONSULTING},{NO},1,8 914,63 - -# save as csv -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Data citations` = trimws(format(`Data citations`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Activity sector` = paste0("{",`Activity sector`,"}")) %>% -mutate(`Country` = paste0("{",`Country`,"}")) %>% -write.table(file = "250-private-sector-table-test.csv", row.names = FALSE, col.names = FALSE, sep = ",", quote = FALSE) - -# save .adoc -# save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -# sink(file = save_file_table, type = "output") -# tt %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() - -# totals table -# save_file_totals = "260-private-sector-totals.adoc" - -# sink(file = save_file_totals, type = "output") -# tt %>% -# summarise( -# Datasets = sum(Datasets), -# `Occurrence records` = sum(`Occurrence records`), -# `Data citations` = sum(`Data citations`) -# ) %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() diff --git a/.history/build-table-script/R/table_20240704123013.R b/.history/build-table-script/R/table_20240704123013.R deleted file mode 100644 index 9658cad..0000000 --- a/.history/build-table-script/R/table_20240704123013.R +++ /dev/null @@ -1,106 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# don't need to read from source anymore since we read from machine tags -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) |> -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) |> -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country = iso2, Datasets, `Occurrence records`, `Data citations`) - - -# "https://www.gbif.org/publisher/f2429cd1-4d45-475c-852a-892024cb4aba[ARC - Arctic Research and Consulting DA]",{CONSULTING},{NO},1,8 914,63 - -# save as csv -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Data citations` = trimws(format(`Data citations`, nsmall=0, big.mark="\u202F"),which ="left") %>% -mutate(`Activity sector` = paste0("{",`Activity sector`,"}")) %>% -mutate(`Country` = paste0("{",`Country`,"}")) %>% -write.table(file = "250-private-sector-table-test.csv", row.names = FALSE, col.names = FALSE, sep = ",", quote = FALSE) - -# save .adoc -# save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -# sink(file = save_file_table, type = "output") -# tt %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() - -# totals table -# save_file_totals = "260-private-sector-totals.adoc" - -# sink(file = save_file_totals, type = "output") -# tt %>% -# summarise( -# Datasets = sum(Datasets), -# `Occurrence records` = sum(`Occurrence records`), -# `Data citations` = sum(`Data citations`) -# ) %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() diff --git a/.history/build-table-script/R/table_20240704123019.R b/.history/build-table-script/R/table_20240704123019.R deleted file mode 100644 index 9658cad..0000000 --- a/.history/build-table-script/R/table_20240704123019.R +++ /dev/null @@ -1,106 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# don't need to read from source anymore since we read from machine tags -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) |> -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) |> -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country = iso2, Datasets, `Occurrence records`, `Data citations`) - - -# "https://www.gbif.org/publisher/f2429cd1-4d45-475c-852a-892024cb4aba[ARC - Arctic Research and Consulting DA]",{CONSULTING},{NO},1,8 914,63 - -# save as csv -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Data citations` = trimws(format(`Data citations`, nsmall=0, big.mark="\u202F"),which ="left") %>% -mutate(`Activity sector` = paste0("{",`Activity sector`,"}")) %>% -mutate(`Country` = paste0("{",`Country`,"}")) %>% -write.table(file = "250-private-sector-table-test.csv", row.names = FALSE, col.names = FALSE, sep = ",", quote = FALSE) - -# save .adoc -# save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -# sink(file = save_file_table, type = "output") -# tt %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() - -# totals table -# save_file_totals = "260-private-sector-totals.adoc" - -# sink(file = save_file_totals, type = "output") -# tt %>% -# summarise( -# Datasets = sum(Datasets), -# `Occurrence records` = sum(`Occurrence records`), -# `Data citations` = sum(`Data citations`) -# ) %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() diff --git a/.history/build-table-script/R/table_20240704123025.R b/.history/build-table-script/R/table_20240704123025.R deleted file mode 100644 index 23863d1..0000000 --- a/.history/build-table-script/R/table_20240704123025.R +++ /dev/null @@ -1,106 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# don't need to read from source anymore since we read from machine tags -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) |> -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) |> -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country = iso2, Datasets, `Occurrence records`, `Data citations`) - - -# "https://www.gbif.org/publisher/f2429cd1-4d45-475c-852a-892024cb4aba[ARC - Arctic Research and Consulting DA]",{CONSULTING},{NO},1,8 914,63 - -# save as csv -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Data citations` = trimws(format(`Data citations`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Activity sector` = paste0("{",`Activity sector`,"}")) %>% -mutate(`Country` = paste0("{",`Country`,"}")) %>% -write.table(file = "250-private-sector-table-test.csv", row.names = FALSE, col.names = FALSE, sep = ",", quote = FALSE) - -# save .adoc -# save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -# sink(file = save_file_table, type = "output") -# tt %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() - -# totals table -# save_file_totals = "260-private-sector-totals.adoc" - -# sink(file = save_file_totals, type = "output") -# tt %>% -# summarise( -# Datasets = sum(Datasets), -# `Occurrence records` = sum(`Occurrence records`), -# `Data citations` = sum(`Data citations`) -# ) %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() diff --git a/.history/build-table-script/R/table_20240704123153.R b/.history/build-table-script/R/table_20240704123153.R deleted file mode 100644 index 8b5cd91..0000000 --- a/.history/build-table-script/R/table_20240704123153.R +++ /dev/null @@ -1,106 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -# don't need to read from source anymore since we read from machine tags -# ss = readr::read_csv("build-table-script/data/source.tsv") %>% -# mutate(pd = ifelse(grepl("publisher",link),"publisher","dataset")) %>% -# mutate(key = gsub("https://www.gbif.org/publisher/","",link)) %>% -# mutate(key = gsub("https://www.gbif.org/dataset/","",key)) %>% -# glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) |> -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) |> -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country = iso2, Datasets, `Occurrence records`, `Data citations`) - - -# "https://www.gbif.org/publisher/f2429cd1-4d45-475c-852a-892024cb4aba[ARC - Arctic Research and Consulting DA]",{CONSULTING},{NO},1,8 914,63 - -# save as csv -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Occurrence records` = trimws(format(`Occurrence records`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Data citations` = trimws(format(`Data citations`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Activity sector` = paste0("{",`Activity sector`,"}")) %>% -mutate(`Country` = paste0("{",`Country`,"}")) %>% -write.table(file = "250-private-sector-table-test.csv", row.names = FALSE, col.names = FALSE, sep = ",", quote = FALSE) - -# save .adoc -# save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -# sink(file = save_file_table, type = "output") -# tt %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() - -# totals table -# save_file_totals = "260-private-sector-totals.adoc" - -# sink(file = save_file_totals, type = "output") -# tt %>% -# summarise( -# Datasets = sum(Datasets), -# `Occurrence records` = sum(`Occurrence records`), -# `Data citations` = sum(`Data citations`) -# ) %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() diff --git a/.history/build-table-script/R/table_20240704123229.R b/.history/build-table-script/R/table_20240704123229.R deleted file mode 100644 index a4b7c11..0000000 --- a/.history/build-table-script/R/table_20240704123229.R +++ /dev/null @@ -1,99 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) |> -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) |> -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country = iso2, Datasets, `Occurrence records`, `Data citations`) - - -# "https://www.gbif.org/publisher/f2429cd1-4d45-475c-852a-892024cb4aba[ARC - Arctic Research and Consulting DA]",{CONSULTING},{NO},1,8 914,63 - -# save as csv -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Occurrence records` = trimws(format(`Occurrence records`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Data citations` = trimws(format(`Data citations`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Activity sector` = paste0("{",`Activity sector`,"}")) %>% -mutate(`Country` = paste0("{",`Country`,"}")) %>% -write.table(file = "250-private-sector-table-test.csv", row.names = FALSE, col.names = FALSE, sep = ",", quote = FALSE) - -# save .adoc -# save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -# sink(file = save_file_table, type = "output") -# tt %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() - -# totals table -# save_file_totals = "260-private-sector-totals.adoc" - -# sink(file = save_file_totals, type = "output") -# tt %>% -# summarise( -# Datasets = sum(Datasets), -# `Occurrence records` = sum(`Occurrence records`), -# `Data citations` = sum(`Data citations`) -# ) %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() diff --git a/.history/build-table-script/R/table_20240704123242.R b/.history/build-table-script/R/table_20240704123242.R deleted file mode 100644 index 70c1a10..0000000 --- a/.history/build-table-script/R/table_20240704123242.R +++ /dev/null @@ -1,100 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# combine -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) |> -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) |> -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country = iso2, Datasets, `Occurrence records`, `Data citations`) - - -# "https://www.gbif.org/publisher/f2429cd1-4d45-475c-852a-892024cb4aba[ARC - Arctic Research and Consulting DA]",{CONSULTING},{NO},1,8 914,63 - -# save as csv -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Occurrence records` = trimws(format(`Occurrence records`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Data citations` = trimws(format(`Data citations`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Activity sector` = paste0("{",`Activity sector`,"}")) %>% -mutate(`Country` = paste0("{",`Country`,"}")) %>% -write.table(file = "250-private-sector-table-test.csv", row.names = FALSE, col.names = FALSE, sep = ",", quote = FALSE) - -# save .adoc -# save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -# sink(file = save_file_table, type = "output") -# tt %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() - -# totals table -# save_file_totals = "260-private-sector-totals.adoc" - -# sink(file = save_file_totals, type = "output") -# tt %>% -# summarise( -# Datasets = sum(Datasets), -# `Occurrence records` = sum(`Occurrence records`), -# `Data citations` = sum(`Data citations`) -# ) %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() diff --git a/.history/build-table-script/R/table_20240704123252.R b/.history/build-table-script/R/table_20240704123252.R deleted file mode 100644 index c428694..0000000 --- a/.history/build-table-script/R/table_20240704123252.R +++ /dev/null @@ -1,100 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset machine tags -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher machine tags -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# combine -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) |> -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) |> -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country = iso2, Datasets, `Occurrence records`, `Data citations`) - - -# "https://www.gbif.org/publisher/f2429cd1-4d45-475c-852a-892024cb4aba[ARC - Arctic Research and Consulting DA]",{CONSULTING},{NO},1,8 914,63 - -# save as csv -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Occurrence records` = trimws(format(`Occurrence records`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Data citations` = trimws(format(`Data citations`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Activity sector` = paste0("{",`Activity sector`,"}")) %>% -mutate(`Country` = paste0("{",`Country`,"}")) %>% -write.table(file = "250-private-sector-table-test.csv", row.names = FALSE, col.names = FALSE, sep = ",", quote = FALSE) - -# save .adoc -# save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -# sink(file = save_file_table, type = "output") -# tt %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() - -# totals table -# save_file_totals = "260-private-sector-totals.adoc" - -# sink(file = save_file_totals, type = "output") -# tt %>% -# summarise( -# Datasets = sum(Datasets), -# `Occurrence records` = sum(`Occurrence records`), -# `Data citations` = sum(`Data citations`) -# ) %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() diff --git a/.history/build-table-script/R/table_20240704123438.R b/.history/build-table-script/R/table_20240704123438.R deleted file mode 100644 index 0f87fc8..0000000 --- a/.history/build-table-script/R/table_20240704123438.R +++ /dev/null @@ -1,100 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset machine tags -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher machine tags -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# combine -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) |> -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) |> -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country = iso2, Datasets, `Occurrence records`, `Data citations`) - - -# "https://www.gbif.org/publisher/f2429cd1-4d45-475c-852a-892024cb4aba[ARC - Arctic Research and Consulting DA]",{CONSULTING},{NO},1,8 914,63 - -# save as csv -tt %>% -mutate(`Datasets` = trimws(format(`Datasets`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Occurrence records` = trimws(format(`Occurrence records`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Data citations` = trimws(format(`Data citations`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Activity sector` = paste0("{",`Activity sector`,"}")) %>% -mutate(`Country` = paste0("{",`Country`,"}")) %>% -write.table(file = "250-private-sector-table-test.csv", row.names = FALSE, col.names = FALSE, sep = ",", quote = FALSE) - -# save .adoc -# save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -# sink(file = save_file_table, type = "output") -# tt %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() - -# totals table -# save_file_totals = "260-private-sector-totals.adoc" - -# sink(file = save_file_totals, type = "output") -# tt %>% -# summarise( -# Datasets = sum(Datasets), -# `Occurrence records` = sum(`Occurrence records`), -# `Data citations` = sum(`Data citations`) -# ) %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() diff --git a/.history/build-table-script/R/table_20240704123439.R b/.history/build-table-script/R/table_20240704123439.R deleted file mode 100644 index 0f87fc8..0000000 --- a/.history/build-table-script/R/table_20240704123439.R +++ /dev/null @@ -1,100 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset machine tags -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher machine tags -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# combine -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) |> -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) |> -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country = iso2, Datasets, `Occurrence records`, `Data citations`) - - -# "https://www.gbif.org/publisher/f2429cd1-4d45-475c-852a-892024cb4aba[ARC - Arctic Research and Consulting DA]",{CONSULTING},{NO},1,8 914,63 - -# save as csv -tt %>% -mutate(`Datasets` = trimws(format(`Datasets`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Occurrence records` = trimws(format(`Occurrence records`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Data citations` = trimws(format(`Data citations`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Activity sector` = paste0("{",`Activity sector`,"}")) %>% -mutate(`Country` = paste0("{",`Country`,"}")) %>% -write.table(file = "250-private-sector-table-test.csv", row.names = FALSE, col.names = FALSE, sep = ",", quote = FALSE) - -# save .adoc -# save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -# sink(file = save_file_table, type = "output") -# tt %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() - -# totals table -# save_file_totals = "260-private-sector-totals.adoc" - -# sink(file = save_file_totals, type = "output") -# tt %>% -# summarise( -# Datasets = sum(Datasets), -# `Occurrence records` = sum(`Occurrence records`), -# `Data citations` = sum(`Data citations`) -# ) %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() diff --git a/.history/build-table-script/R/table_20240704123755.R b/.history/build-table-script/R/table_20240704123755.R deleted file mode 100644 index ed0b72c..0000000 --- a/.history/build-table-script/R/table_20240704123755.R +++ /dev/null @@ -1,72 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset machine tags -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher machine tags -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# combine -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) |> -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) |> -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country = iso2, Datasets, `Occurrence records`, `Data citations`) - - -# "https://www.gbif.org/publisher/f2429cd1-4d45-475c-852a-892024cb4aba[ARC - Arctic Research and Consulting DA]",{CONSULTING},{NO},1,8 914,63 - -# save as csv -tt %>% -mutate(`Datasets` = trimws(format(`Datasets`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Occurrence records` = trimws(format(`Occurrence records`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Data citations` = trimws(format(`Data citations`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Activity sector` = paste0("{",`Activity sector`,"}")) %>% -mutate(`Country` = paste0("{",`Country`,"}")) %>% -write.table(file = "250-private-sector-table-test.csv", row.names = FALSE, col.names = FALSE, sep = ",", quote = FALSE) diff --git a/.history/build-table-script/R/table_20240704123814.R b/.history/build-table-script/R/table_20240704123814.R deleted file mode 100644 index f737597..0000000 --- a/.history/build-table-script/R/table_20240704123814.R +++ /dev/null @@ -1,101 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset machine tags -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) |> -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# publisher machine tags -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) |> -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") |> -select(link, `Activity sector` = value, pd, key = uuid) |> -glimpse() - -# combine -ss = list(ds_mt,pb_mt) |> -bind_rows() |> -glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) |> -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) |> -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country = iso2, Datasets, `Occurrence records`, `Data citations`) - - -# "https://www.gbif.org/publisher/f2429cd1-4d45-475c-852a-892024cb4aba[ARC - Arctic Research and Consulting DA]",{CONSULTING},{NO},1,8 914,63 - -# save as csv -tt %>% -mutate(`Datasets` = trimws(format(`Datasets`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Occurrence records` = trimws(format(`Occurrence records`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Data citations` = trimws(format(`Data citations`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Activity sector` = paste0("{",`Activity sector`,"}")) %>% -mutate(`Country` = paste0("{",`Country`,"}")) %>% -write.table(file = "250-private-sector-table-test.csv", row.names = FALSE, col.names = FALSE, sep = ",", quote = FALSE) - - -# save .adoc -# save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -# sink(file = save_file_table, type = "output") -# tt %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() - -# totals table -# save_file_totals = "260-private-sector-totals.adoc" - -# sink(file = save_file_totals, type = "output") -# tt %>% -# summarise( -# Datasets = sum(Datasets), -# `Occurrence records` = sum(`Occurrence records`), -# `Data citations` = sum(`Data citations`) -# ) %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() diff --git a/.history/build-table-script/R/table_20240704123846.R b/.history/build-table-script/R/table_20240704123846.R deleted file mode 100644 index 73de10c..0000000 --- a/.history/build-table-script/R/table_20240704123846.R +++ /dev/null @@ -1,101 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset machine tags -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) %>% -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") %>% -select(link, `Activity sector` = value, pd, key = uuid) %>% -glimpse() - -# publisher machine tags -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) %>% -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") %>% -select(link, `Activity sector` = value, pd, key = uuid) %>% -glimpse() - -# combine -ss = list(ds_mt,pb_mt) %>% -bind_rows() %>% -glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country = iso2, Datasets, `Occurrence records`, `Data citations`) - - -# "https://www.gbif.org/publisher/f2429cd1-4d45-475c-852a-892024cb4aba[ARC - Arctic Research and Consulting DA]",{CONSULTING},{NO},1,8 914,63 - -# save as csv -tt %>% -mutate(`Datasets` = trimws(format(`Datasets`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Occurrence records` = trimws(format(`Occurrence records`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Data citations` = trimws(format(`Data citations`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Activity sector` = paste0("{",`Activity sector`,"}")) %>% -mutate(`Country` = paste0("{",`Country`,"}")) %>% -write.table(file = "250-private-sector-table-test.csv", row.names = FALSE, col.names = FALSE, sep = ",", quote = FALSE) - - -# save .adoc -# save_file_table = "250-private-sector-table.adoc" - -# save and more cleanup -# sink(file = save_file_table, type = "output") -# tt %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() - -# totals table -# save_file_totals = "260-private-sector-totals.adoc" - -# sink(file = save_file_totals, type = "output") -# tt %>% -# summarise( -# Datasets = sum(Datasets), -# `Occurrence records` = sum(`Occurrence records`), -# `Data citations` = sum(`Data citations`) -# ) %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() diff --git a/.history/build-table-script/R/table_20240704131922.R b/.history/build-table-script/R/table_20240704131922.R deleted file mode 100644 index ae8d46c..0000000 --- a/.history/build-table-script/R/table_20240704131922.R +++ /dev/null @@ -1,96 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset machine tags -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) %>% -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") %>% -select(link, `Activity sector` = value, pd, key = uuid) %>% -glimpse() - -# publisher machine tags -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) %>% -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") %>% -select(link, `Activity sector` = value, pd, key = uuid) %>% -glimpse() - -# combine -ss = list(ds_mt,pb_mt) %>% -bind_rows() %>% -glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country = iso2, Datasets, `Occurrence records`, `Data citations`) - - -# "https://www.gbif.org/publisher/f2429cd1-4d45-475c-852a-892024cb4aba[ARC - Arctic Research and Consulting DA]",{CONSULTING},{NO},1,8 914,63 - -# save as csv -tt %>% -mutate(`Datasets` = trimws(format(`Datasets`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Occurrence records` = trimws(format(`Occurrence records`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Data citations` = trimws(format(`Data citations`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Activity sector` = paste0("{",`Activity sector`,"}")) %>% -mutate(`Country` = paste0("{",`Country`,"}")) %>% -write.table(file = "250-private-sector-table-test.csv", row.names = FALSE, col.names = FALSE, sep = ",", quote = FALSE) - -# totals table -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() - -# totals table -# save_file_totals = "260-private-sector-totals.adoc" - -# sink(file = save_file_totals, type = "output") -# tt %>% -# summarise( -# Datasets = sum(Datasets), -# `Occurrence records` = sum(`Occurrence records`), -# `Data citations` = sum(`Data citations`) -# ) %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() diff --git a/.history/build-table-script/R/table_20240704131936.R b/.history/build-table-script/R/table_20240704131936.R deleted file mode 100644 index d80a17e..0000000 --- a/.history/build-table-script/R/table_20240704131936.R +++ /dev/null @@ -1,96 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset machine tags -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) %>% -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") %>% -select(link, `Activity sector` = value, pd, key = uuid) %>% -glimpse() - -# publisher machine tags -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) %>% -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") %>% -select(link, `Activity sector` = value, pd, key = uuid) %>% -glimpse() - -# combine -ss = list(ds_mt,pb_mt) %>% -bind_rows() %>% -glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables and clean up -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country = iso2, Datasets, `Occurrence records`, `Data citations`) - - -# "https://www.gbif.org/publisher/f2429cd1-4d45-475c-852a-892024cb4aba[ARC - Arctic Research and Consulting DA]",{CONSULTING},{NO},1,8 914,63 - -# save csv and clean up -tt %>% -mutate(`Datasets` = trimws(format(`Datasets`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Occurrence records` = trimws(format(`Occurrence records`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Data citations` = trimws(format(`Data citations`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Activity sector` = paste0("{",`Activity sector`,"}")) %>% -mutate(`Country` = paste0("{",`Country`,"}")) %>% -write.table(file = "250-private-sector-table-test.csv", row.names = FALSE, col.names = FALSE, sep = ",", quote = FALSE) - -# totals table -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() - -# totals table -# save_file_totals = "260-private-sector-totals.adoc" - -# sink(file = save_file_totals, type = "output") -# tt %>% -# summarise( -# Datasets = sum(Datasets), -# `Occurrence records` = sum(`Occurrence records`), -# `Data citations` = sum(`Data citations`) -# ) %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() diff --git a/.history/build-table-script/R/table_20240704131944.R b/.history/build-table-script/R/table_20240704131944.R deleted file mode 100644 index 02e9d05..0000000 --- a/.history/build-table-script/R/table_20240704131944.R +++ /dev/null @@ -1,96 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset machine tags -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) %>% -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") %>% -select(link, `Activity sector` = value, pd, key = uuid) %>% -glimpse() - -# publisher machine tags -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) %>% -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") %>% -select(link, `Activity sector` = value, pd, key = uuid) %>% -glimpse() - -# combine -ss = list(ds_mt,pb_mt) %>% -bind_rows() %>% -glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country = iso2, Datasets, `Occurrence records`, `Data citations`) - - -# "https://www.gbif.org/publisher/f2429cd1-4d45-475c-852a-892024cb4aba[ARC - Arctic Research and Consulting DA]",{CONSULTING},{NO},1,8 914,63 - -# save csv and clean up -tt %>% -mutate(`Datasets` = trimws(format(`Datasets`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Occurrence records` = trimws(format(`Occurrence records`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Data citations` = trimws(format(`Data citations`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Activity sector` = paste0("{",`Activity sector`,"}")) %>% -mutate(`Country` = paste0("{",`Country`,"}")) %>% -write.table(file = "250-private-sector-table-test.csv", row.names = FALSE, col.names = FALSE, sep = ",", quote = FALSE) - -# totals table -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() - -# totals table -# save_file_totals = "260-private-sector-totals.adoc" - -# sink(file = save_file_totals, type = "output") -# tt %>% -# summarise( -# Datasets = sum(Datasets), -# `Occurrence records` = sum(`Occurrence records`), -# `Data citations` = sum(`Data citations`) -# ) %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() diff --git a/.history/build-table-script/R/table_20240704131948.R b/.history/build-table-script/R/table_20240704131948.R deleted file mode 100644 index d2da307..0000000 --- a/.history/build-table-script/R/table_20240704131948.R +++ /dev/null @@ -1,93 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset machine tags -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) %>% -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") %>% -select(link, `Activity sector` = value, pd, key = uuid) %>% -glimpse() - -# publisher machine tags -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) %>% -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") %>% -select(link, `Activity sector` = value, pd, key = uuid) %>% -glimpse() - -# combine -ss = list(ds_mt,pb_mt) %>% -bind_rows() %>% -glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country = iso2, Datasets, `Occurrence records`, `Data citations`) - -# save csv and clean up -tt %>% -mutate(`Datasets` = trimws(format(`Datasets`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Occurrence records` = trimws(format(`Occurrence records`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Data citations` = trimws(format(`Data citations`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Activity sector` = paste0("{",`Activity sector`,"}")) %>% -mutate(`Country` = paste0("{",`Country`,"}")) %>% -write.table(file = "250-private-sector-table-test.csv", row.names = FALSE, col.names = FALSE, sep = ",", quote = FALSE) - -# totals table -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() - -# totals table -# save_file_totals = "260-private-sector-totals.adoc" - -# sink(file = save_file_totals, type = "output") -# tt %>% -# summarise( -# Datasets = sum(Datasets), -# `Occurrence records` = sum(`Occurrence records`), -# `Data citations` = sum(`Data citations`) -# ) %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() diff --git a/.history/build-table-script/R/table_20240704132011.R b/.history/build-table-script/R/table_20240704132011.R deleted file mode 100644 index 06d8d8e..0000000 --- a/.history/build-table-script/R/table_20240704132011.R +++ /dev/null @@ -1,93 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset machine tags -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) %>% -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") %>% -select(link, `Activity sector` = value, pd, key = uuid) %>% -glimpse() - -# publisher machine tags -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) %>% -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") %>% -select(link, `Activity sector` = value, pd, key = uuid) %>% -glimpse() - -# combine -ss = list(ds_mt,pb_mt) %>% -bind_rows() %>% -glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country = iso2, Datasets, `Occurrence records`, `Data citations`) - -# save csv and clean up -tt %>% -mutate(`Datasets` = trimws(format(`Datasets`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Occurrence records` = trimws(format(`Occurrence records`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Data citations` = trimws(format(`Data citations`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Activity sector` = paste0("{",`Activity sector`,"}")) %>% -mutate(`Country` = paste0("{",`Country`,"}")) %>% -write.table(file = "250-private-sector-table-test.csv", row.names = FALSE, col.names = FALSE, sep = ",", quote = FALSE) - -# totals table -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark="\u202F")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() - -# totals table -# save_file_totals = "260-private-sector-totals.adoc" - -# sink(file = save_file_totals, type = "output") -# tt %>% -# summarise( -# Datasets = sum(Datasets), -# `Occurrence records` = sum(`Occurrence records`), -# `Data citations` = sum(`Data citations`) -# ) %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() diff --git a/.history/build-table-script/R/table_20240704132020.R b/.history/build-table-script/R/table_20240704132020.R deleted file mode 100644 index dbf1de9..0000000 --- a/.history/build-table-script/R/table_20240704132020.R +++ /dev/null @@ -1,93 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset machine tags -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) %>% -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") %>% -select(link, `Activity sector` = value, pd, key = uuid) %>% -glimpse() - -# publisher machine tags -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) %>% -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") %>% -select(link, `Activity sector` = value, pd, key = uuid) %>% -glimpse() - -# combine -ss = list(ds_mt,pb_mt) %>% -bind_rows() %>% -glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country = iso2, Datasets, `Occurrence records`, `Data citations`) - -# save csv and clean up -tt %>% -mutate(`Datasets` = trimws(format(`Datasets`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Occurrence records` = trimws(format(`Occurrence records`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Data citations` = trimws(format(`Data citations`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Activity sector` = paste0("{",`Activity sector`,"}")) %>% -mutate(`Country` = paste0("{",`Country`,"}")) %>% -write.table(file = "250-private-sector-table-test.csv", row.names = FALSE, col.names = FALSE, sep = ",", quote = FALSE) - -# totals table -tt %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark="\u202F")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() - -# totals table -# save_file_totals = "260-private-sector-totals.adoc" - -# sink(file = save_file_totals, type = "output") -# tt %>% -# summarise( -# Datasets = sum(Datasets), -# `Occurrence records` = sum(`Occurrence records`), -# `Data citations` = sum(`Data citations`) -# ) %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() diff --git a/.history/build-table-script/R/table_20240704132616.R b/.history/build-table-script/R/table_20240704132616.R deleted file mode 100644 index b9ec59e..0000000 --- a/.history/build-table-script/R/table_20240704132616.R +++ /dev/null @@ -1,93 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset machine tags -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) %>% -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") %>% -select(link, `Activity sector` = value, pd, key = uuid) %>% -glimpse() - -# publisher machine tags -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) %>% -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") %>% -select(link, `Activity sector` = value, pd, key = uuid) %>% -glimpse() - -# combine -ss = list(ds_mt,pb_mt) %>% -bind_rows() %>% -glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country = iso2, Datasets, `Occurrence records`, `Data citations`) - -# save csv and clean up -tt %>% -mutate(`Datasets` = trimws(format(`Datasets`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Occurrence records` = trimws(format(`Occurrence records`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Data citations` = trimws(format(`Data citations`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Activity sector` = paste0("{",`Activity sector`,"}")) %>% -mutate(`Country` = paste0("{",`Country`,"}")) %>% -write.table(file = "250-private-sector-table-test.csv", row.names = FALSE, col.names = FALSE, sep = ",", quote = FALSE) - -# totals table -tt %>% -mutate(`Datasets` = trimws(format(`Datasets`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark="\u202F")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() - -# totals table -# save_file_totals = "260-private-sector-totals.adoc" - -# sink(file = save_file_totals, type = "output") -# tt %>% -# summarise( -# Datasets = sum(Datasets), -# `Occurrence records` = sum(`Occurrence records`), -# `Data citations` = sum(`Data citations`) -# ) %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() diff --git a/.history/build-table-script/R/table_20240704132618.R b/.history/build-table-script/R/table_20240704132618.R deleted file mode 100644 index b9ec59e..0000000 --- a/.history/build-table-script/R/table_20240704132618.R +++ /dev/null @@ -1,93 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset machine tags -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) %>% -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") %>% -select(link, `Activity sector` = value, pd, key = uuid) %>% -glimpse() - -# publisher machine tags -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) %>% -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") %>% -select(link, `Activity sector` = value, pd, key = uuid) %>% -glimpse() - -# combine -ss = list(ds_mt,pb_mt) %>% -bind_rows() %>% -glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country = iso2, Datasets, `Occurrence records`, `Data citations`) - -# save csv and clean up -tt %>% -mutate(`Datasets` = trimws(format(`Datasets`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Occurrence records` = trimws(format(`Occurrence records`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Data citations` = trimws(format(`Data citations`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Activity sector` = paste0("{",`Activity sector`,"}")) %>% -mutate(`Country` = paste0("{",`Country`,"}")) %>% -write.table(file = "250-private-sector-table-test.csv", row.names = FALSE, col.names = FALSE, sep = ",", quote = FALSE) - -# totals table -tt %>% -mutate(`Datasets` = trimws(format(`Datasets`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark="\u202F")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark="\u202F")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() - -# totals table -# save_file_totals = "260-private-sector-totals.adoc" - -# sink(file = save_file_totals, type = "output") -# tt %>% -# summarise( -# Datasets = sum(Datasets), -# `Occurrence records` = sum(`Occurrence records`), -# `Data citations` = sum(`Data citations`) -# ) %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# ascii::ascii(include.rownames = FALSE, digits = 0) -# sink() diff --git a/.history/build-table-script/R/table_20240704132737.R b/.history/build-table-script/R/table_20240704132737.R deleted file mode 100644 index e334a1a..0000000 --- a/.history/build-table-script/R/table_20240704132737.R +++ /dev/null @@ -1,89 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset machine tags -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) %>% -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") %>% -select(link, `Activity sector` = value, pd, key = uuid) %>% -glimpse() - -# publisher machine tags -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) %>% -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") %>% -select(link, `Activity sector` = value, pd, key = uuid) %>% -glimpse() - -# combine -ss = list(ds_mt,pb_mt) %>% -bind_rows() %>% -glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country = iso2, Datasets, `Occurrence records`, `Data citations`) - -# save csv and clean up -tt %>% -mutate(`Datasets` = trimws(format(`Datasets`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Occurrence records` = trimws(format(`Occurrence records`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Data citations` = trimws(format(`Data citations`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Activity sector` = paste0("{",`Activity sector`,"}")) %>% -mutate(`Country` = paste0("{",`Country`,"}")) %>% -write.table(file = "250-private-sector-table-test.csv", row.names = FALSE, col.names = FALSE, sep = ",", quote = FALSE) - -# totals table -tt %>% -mutate(`Datasets` = trimws(format(`Datasets`, nsmall=0, big.mark="\u202F"),which ="left"))) %>% -mutate(`Occurrence records` = trimws(format(`Occurrence records`, nsmall=0, big.mark="\u202F"),which ="left"))) %>% -mutate(`Data citations` = trimws(format(`Data citations`, nsmall=0, big.mark="\u202F"))) %>% - -# save_file_totals = "260-private-sector-totals.adoc" - -# sink(file = save_file_totals, type = "output") -# tt %>% -# summarise( -# Datasets = sum(Datasets), -# `Occurrence records` = sum(`Occurrence records`), -# `Data citations` = sum(`Data citations`) -# ) %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% -# sink() diff --git a/.history/build-table-script/R/table_20240704132740.R b/.history/build-table-script/R/table_20240704132740.R deleted file mode 100644 index 7eff28e..0000000 --- a/.history/build-table-script/R/table_20240704132740.R +++ /dev/null @@ -1,88 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset machine tags -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) %>% -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") %>% -select(link, `Activity sector` = value, pd, key = uuid) %>% -glimpse() - -# publisher machine tags -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) %>% -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") %>% -select(link, `Activity sector` = value, pd, key = uuid) %>% -glimpse() - -# combine -ss = list(ds_mt,pb_mt) %>% -bind_rows() %>% -glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country = iso2, Datasets, `Occurrence records`, `Data citations`) - -# save csv and clean up -tt %>% -mutate(`Datasets` = trimws(format(`Datasets`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Occurrence records` = trimws(format(`Occurrence records`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Data citations` = trimws(format(`Data citations`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Activity sector` = paste0("{",`Activity sector`,"}")) %>% -mutate(`Country` = paste0("{",`Country`,"}")) %>% -write.table(file = "250-private-sector-table-test.csv", row.names = FALSE, col.names = FALSE, sep = ",", quote = FALSE) - -# totals table -tt %>% -mutate(`Datasets` = trimws(format(`Datasets`, nsmall=0, big.mark="\u202F"),which ="left"))) %>% -mutate(`Occurrence records` = trimws(format(`Occurrence records`, nsmall=0, big.mark="\u202F"),which ="left"))) %>% -mutate(`Data citations` = trimws(format(`Data citations`, nsmall=0, big.mark="\u202F"))) %>% - -# save_file_totals = "260-private-sector-totals.adoc" - -# sink(file = save_file_totals, type = "output") -# tt %>% -# summarise( -# Datasets = sum(Datasets), -# `Occurrence records` = sum(`Occurrence records`), -# `Data citations` = sum(`Data citations`) -# ) %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% diff --git a/.history/build-table-script/R/table_20240704132746.R b/.history/build-table-script/R/table_20240704132746.R deleted file mode 100644 index d7728d6..0000000 --- a/.history/build-table-script/R/table_20240704132746.R +++ /dev/null @@ -1,87 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset machine tags -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) %>% -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") %>% -select(link, `Activity sector` = value, pd, key = uuid) %>% -glimpse() - -# publisher machine tags -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) %>% -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") %>% -select(link, `Activity sector` = value, pd, key = uuid) %>% -glimpse() - -# combine -ss = list(ds_mt,pb_mt) %>% -bind_rows() %>% -glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country = iso2, Datasets, `Occurrence records`, `Data citations`) - -# save csv and clean up -tt %>% -mutate(`Datasets` = trimws(format(`Datasets`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Occurrence records` = trimws(format(`Occurrence records`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Data citations` = trimws(format(`Data citations`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Activity sector` = paste0("{",`Activity sector`,"}")) %>% -mutate(`Country` = paste0("{",`Country`,"}")) %>% -write.table(file = "250-private-sector-table-test.csv", row.names = FALSE, col.names = FALSE, sep = ",", quote = FALSE) - -# totals table -tt %>% -mutate(`Datasets` = trimws(format(`Datasets`, nsmall=0, big.mark="\u202F"),which ="left"))) %>% -mutate(`Occurrence records` = trimws(format(`Occurrence records`, nsmall=0, big.mark="\u202F"),which ="left"))) %>% -mutate(`Data citations` = trimws(format(`Data citations`, nsmall=0, big.mark="\u202F"))) %>% - -# save_file_totals = "260-private-sector-totals.adoc" - -# tt %>% -# summarise( -# Datasets = sum(Datasets), -# `Occurrence records` = sum(`Occurrence records`), -# `Data citations` = sum(`Data citations`) -# ) %>% -# mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -# mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -# mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% diff --git a/.history/build-table-script/R/table_20240704132808.R b/.history/build-table-script/R/table_20240704132808.R deleted file mode 100644 index c94fcc9..0000000 --- a/.history/build-table-script/R/table_20240704132808.R +++ /dev/null @@ -1,83 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset machine tags -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) %>% -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") %>% -select(link, `Activity sector` = value, pd, key = uuid) %>% -glimpse() - -# publisher machine tags -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) %>% -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") %>% -select(link, `Activity sector` = value, pd, key = uuid) %>% -glimpse() - -# combine -ss = list(ds_mt,pb_mt) %>% -bind_rows() %>% -glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country = iso2, Datasets, `Occurrence records`, `Data citations`) - -# save csv and clean up -tt %>% -mutate(`Datasets` = trimws(format(`Datasets`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Occurrence records` = trimws(format(`Occurrence records`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Data citations` = trimws(format(`Data citations`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Activity sector` = paste0("{",`Activity sector`,"}")) %>% -mutate(`Country` = paste0("{",`Country`,"}")) %>% -write.table(file = "250-private-sector-table-test.csv", row.names = FALSE, col.names = FALSE, sep = ",", quote = FALSE) - -# totals table - -# save_file_totals = "260-private-sector-totals.adoc" - -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% diff --git a/.history/build-table-script/R/table_20240704132810.R b/.history/build-table-script/R/table_20240704132810.R deleted file mode 100644 index c94fcc9..0000000 --- a/.history/build-table-script/R/table_20240704132810.R +++ /dev/null @@ -1,83 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset machine tags -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) %>% -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") %>% -select(link, `Activity sector` = value, pd, key = uuid) %>% -glimpse() - -# publisher machine tags -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) %>% -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") %>% -select(link, `Activity sector` = value, pd, key = uuid) %>% -glimpse() - -# combine -ss = list(ds_mt,pb_mt) %>% -bind_rows() %>% -glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country = iso2, Datasets, `Occurrence records`, `Data citations`) - -# save csv and clean up -tt %>% -mutate(`Datasets` = trimws(format(`Datasets`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Occurrence records` = trimws(format(`Occurrence records`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Data citations` = trimws(format(`Data citations`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Activity sector` = paste0("{",`Activity sector`,"}")) %>% -mutate(`Country` = paste0("{",`Country`,"}")) %>% -write.table(file = "250-private-sector-table-test.csv", row.names = FALSE, col.names = FALSE, sep = ",", quote = FALSE) - -# totals table - -# save_file_totals = "260-private-sector-totals.adoc" - -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = format(`Datasets`, nsmall=0, big.mark=",")) %>% -mutate(`Occurrence records` = format(`Occurrence records`, nsmall=0, big.mark=",")) %>% -mutate(`Data citations` = format(`Data citations`, nsmall=0, big.mark=",")) %>% diff --git a/.history/build-table-script/R/table_20240704133533.R b/.history/build-table-script/R/table_20240704133533.R deleted file mode 100644 index ef142b3..0000000 --- a/.history/build-table-script/R/table_20240704133533.R +++ /dev/null @@ -1,83 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset machine tags -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) %>% -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") %>% -select(link, `Activity sector` = value, pd, key = uuid) %>% -glimpse() - -# publisher machine tags -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) %>% -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") %>% -select(link, `Activity sector` = value, pd, key = uuid) %>% -glimpse() - -# combine -ss = list(ds_mt,pb_mt) %>% -bind_rows() %>% -glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country = iso2, Datasets, `Occurrence records`, `Data citations`) - -# save csv and clean up -tt %>% -mutate(`Datasets` = trimws(format(`Datasets`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Occurrence records` = trimws(format(`Occurrence records`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Data citations` = trimws(format(`Data citations`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Activity sector` = paste0("{",`Activity sector`,"}")) %>% -mutate(`Country` = paste0("{",`Country`,"}")) %>% -write.table(file = "250-private-sector-table-test.csv", row.names = FALSE, col.names = FALSE, sep = ",", quote = FALSE) - -# totals table - -# save_file_totals = "260-private-sector-totals.adoc" - -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = trimws(format(`Datasets`, nsmall=0, big.mark=","),which ="left")) %>% -mutate(`Occurrence records` = trimws(format(`Occurrence records`, nsmall=0, big.mark=","),which ="left")) %>% -mutate(`Data citations` = trimws(format(`Data citations`, nsmall=0, big.mark=","),which ="left")) diff --git a/.history/build-table-script/R/table_20240704133603.R b/.history/build-table-script/R/table_20240704133603.R deleted file mode 100644 index 076bbff..0000000 --- a/.history/build-table-script/R/table_20240704133603.R +++ /dev/null @@ -1,83 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset machine tags -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) %>% -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") %>% -select(link, `Activity sector` = value, pd, key = uuid) %>% -glimpse() - -# publisher machine tags -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) %>% -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") %>% -select(link, `Activity sector` = value, pd, key = uuid) %>% -glimpse() - -# combine -ss = list(ds_mt,pb_mt) %>% -bind_rows() %>% -glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country = iso2, Datasets, `Occurrence records`, `Data citations`) - -# save csv and clean up -tt %>% -mutate(`Datasets` = trimws(format(`Datasets`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Occurrence records` = trimws(format(`Occurrence records`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Data citations` = trimws(format(`Data citations`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Activity sector` = paste0("{",`Activity sector`,"}")) %>% -mutate(`Country` = paste0("{",`Country`,"}")) %>% -write.table(file = "250-private-sector-table-test.csv", row.names = FALSE, col.names = FALSE, sep = ",", quote = FALSE) - -# totals table - -# save_file_totals = "260-private-sector-totals.adoc" - -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = trimws(format(`Datasets`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Occurrence records` = trimws(format(`Occurrence records`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Data citations` = trimws(format(`Data citations`, nsmall=0, big.mark="\u202F"),which ="left")) diff --git a/.history/build-table-script/R/table_20240704134658.R b/.history/build-table-script/R/table_20240704134658.R deleted file mode 100644 index 8212532..0000000 --- a/.history/build-table-script/R/table_20240704134658.R +++ /dev/null @@ -1,82 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset machine tags -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) %>% -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") %>% -select(link, `Activity sector` = value, pd, key = uuid) %>% -glimpse() - -# publisher machine tags -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) %>% -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") %>% -select(link, `Activity sector` = value, pd, key = uuid) %>% -glimpse() - -# combine -ss = list(ds_mt,pb_mt) %>% -bind_rows() %>% -glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country = iso2, Datasets, `Occurrence records`, `Data citations`) - -# save csv and clean up -tt %>% -mutate(`Datasets` = trimws(format(`Datasets`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Occurrence records` = trimws(format(`Occurrence records`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Data citations` = trimws(format(`Data citations`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Activity sector` = paste0("{",`Activity sector`,"}")) %>% -mutate(`Country` = paste0("{",`Country`,"}")) %>% -write.table(file = "250-private-sector-table-test.csv", row.names = FALSE, col.names = FALSE, sep = ",", quote = FALSE) - -# totals table -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = trimws(format(`Datasets`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Occurrence records` = trimws(format(`Occurrence records`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Data citations` = trimws(format(`Data citations`, nsmall=0, big.mark="\u202F"),which ="left")) -write.table(file = ""260-private-sector-totals.csv", row.names = FALSE, col.names = FALSE, sep = ",", quote = FALSE) -# save_file_totals = "260-private-sector-totals.adoc" diff --git a/.history/build-table-script/R/table_20240704134713.R b/.history/build-table-script/R/table_20240704134713.R deleted file mode 100644 index 5e6f44d..0000000 --- a/.history/build-table-script/R/table_20240704134713.R +++ /dev/null @@ -1,81 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset machine tags -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) %>% -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") %>% -select(link, `Activity sector` = value, pd, key = uuid) %>% -glimpse() - -# publisher machine tags -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) %>% -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") %>% -select(link, `Activity sector` = value, pd, key = uuid) %>% -glimpse() - -# combine -ss = list(ds_mt,pb_mt) %>% -bind_rows() %>% -glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country = iso2, Datasets, `Occurrence records`, `Data citations`) - -# save csv and clean up -tt %>% -mutate(`Datasets` = trimws(format(`Datasets`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Occurrence records` = trimws(format(`Occurrence records`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Data citations` = trimws(format(`Data citations`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Activity sector` = paste0("{",`Activity sector`,"}")) %>% -mutate(`Country` = paste0("{",`Country`,"}")) %>% -write.table(file = "250-private-sector-table-test.csv", row.names = FALSE, col.names = FALSE, sep = ",", quote = FALSE) - -# totals table -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = trimws(format(`Datasets`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Occurrence records` = trimws(format(`Occurrence records`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Data citations` = trimws(format(`Data citations`, nsmall=0, big.mark="\u202F"),which ="left")) -write.table(file = "260-private-sector-totals.csv", row.names = FALSE, col.names = FALSE, sep = ",", quote = FALSE) %>% diff --git a/.history/build-table-script/R/table_20240704134726.R b/.history/build-table-script/R/table_20240704134726.R deleted file mode 100644 index 758e67d..0000000 --- a/.history/build-table-script/R/table_20240704134726.R +++ /dev/null @@ -1,81 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset machine tags -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) %>% -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") %>% -select(link, `Activity sector` = value, pd, key = uuid) %>% -glimpse() - -# publisher machine tags -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) %>% -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") %>% -select(link, `Activity sector` = value, pd, key = uuid) %>% -glimpse() - -# combine -ss = list(ds_mt,pb_mt) %>% -bind_rows() %>% -glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country = iso2, Datasets, `Occurrence records`, `Data citations`) - -# save csv and clean up -tt %>% -mutate(`Datasets` = trimws(format(`Datasets`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Occurrence records` = trimws(format(`Occurrence records`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Data citations` = trimws(format(`Data citations`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Activity sector` = paste0("{",`Activity sector`,"}")) %>% -mutate(`Country` = paste0("{",`Country`,"}")) %>% -write.table(file = "250-private-sector-table-test.csv", row.names = FALSE, col.names = FALSE, sep = ",", quote = FALSE) - -# totals table -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = trimws(format(`Datasets`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Occurrence records` = trimws(format(`Occurrence records`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Data citations` = trimws(format(`Data citations`, nsmall=0, big.mark="\u202F"),which ="left")) -write.table(file = "260-private-sector-totals.csv", row.names = FALSE, col.names = FALSE, sep = ",", quote = FALSE) diff --git a/.history/build-table-script/R/table_20240704134736.R b/.history/build-table-script/R/table_20240704134736.R deleted file mode 100644 index e977932..0000000 --- a/.history/build-table-script/R/table_20240704134736.R +++ /dev/null @@ -1,81 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset machine tags -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) %>% -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") %>% -select(link, `Activity sector` = value, pd, key = uuid) %>% -glimpse() - -# publisher machine tags -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) %>% -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") %>% -select(link, `Activity sector` = value, pd, key = uuid) %>% -glimpse() - -# combine -ss = list(ds_mt,pb_mt) %>% -bind_rows() %>% -glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country = iso2, Datasets, `Occurrence records`, `Data citations`) - -# save csv and clean up -tt %>% -mutate(`Datasets` = trimws(format(`Datasets`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Occurrence records` = trimws(format(`Occurrence records`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Data citations` = trimws(format(`Data citations`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Activity sector` = paste0("{",`Activity sector`,"}")) %>% -mutate(`Country` = paste0("{",`Country`,"}")) %>% -write.table(file = "250-private-sector-table-test.csv", row.names = FALSE, col.names = FALSE, sep = ",", quote = FALSE) - -# totals table -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = trimws(format(`Datasets`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Occurrence records` = trimws(format(`Occurrence records`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Data citations` = trimws(format(`Data citations`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -write.table(file = "260-private-sector-totals.csv", row.names = FALSE, col.names = FALSE, sep = ",", quote = FALSE) diff --git a/.history/build-table-script/R/table_20240704134738.R b/.history/build-table-script/R/table_20240704134738.R deleted file mode 100644 index e977932..0000000 --- a/.history/build-table-script/R/table_20240704134738.R +++ /dev/null @@ -1,81 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset machine tags -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) %>% -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") %>% -select(link, `Activity sector` = value, pd, key = uuid) %>% -glimpse() - -# publisher machine tags -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) %>% -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") %>% -select(link, `Activity sector` = value, pd, key = uuid) %>% -glimpse() - -# combine -ss = list(ds_mt,pb_mt) %>% -bind_rows() %>% -glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country = iso2, Datasets, `Occurrence records`, `Data citations`) - -# save csv and clean up -tt %>% -mutate(`Datasets` = trimws(format(`Datasets`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Occurrence records` = trimws(format(`Occurrence records`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Data citations` = trimws(format(`Data citations`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Activity sector` = paste0("{",`Activity sector`,"}")) %>% -mutate(`Country` = paste0("{",`Country`,"}")) %>% -write.table(file = "250-private-sector-table-test.csv", row.names = FALSE, col.names = FALSE, sep = ",", quote = FALSE) - -# totals table -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = trimws(format(`Datasets`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Occurrence records` = trimws(format(`Occurrence records`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Data citations` = trimws(format(`Data citations`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -write.table(file = "260-private-sector-totals.csv", row.names = FALSE, col.names = FALSE, sep = ",", quote = FALSE) diff --git a/.history/build-table-script/R/table_20240704135018.R b/.history/build-table-script/R/table_20240704135018.R deleted file mode 100644 index b55a036..0000000 --- a/.history/build-table-script/R/table_20240704135018.R +++ /dev/null @@ -1,81 +0,0 @@ - -library(dplyr) -library(purrr) -library(gbifmt) # my library -# setwd("C:/Users/ftw712/Desktop/doc-private-sector-data-publishing/") - -# harvest publishers and datasets private sector publishers directly from machineTags - -# dataset machine tags -ds_mt = get_mt("privateSector.gbif.org",type="dataset",limit=500) %>% -mutate(link = paste0("https://www.gbif.org/dataset/",uuid)) %>% -mutate(pd = "dataset") %>% -select(link, `Activity sector` = value, pd, key = uuid) %>% -glimpse() - -# publisher machine tags -pb_mt = get_mt("privateSector.gbif.org",type="organization",limit=500) %>% -mutate(link = paste0("https://www.gbif.org/publisher/",uuid)) %>% -mutate(pd = "publisher") %>% -select(link, `Activity sector` = value, pd, key = uuid) %>% -glimpse() - -# combine -ss = list(ds_mt,pb_mt) %>% -bind_rows() %>% -glimpse() - -gbif_country = rgbif::enumeration_country() %>% select(Country=title,iso2) %>% glimpse() - -pp = ss %>% -dplyr::filter(pd == "publisher") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::organizations(uuid=.x,limit=1)$data$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ rgbif::occ_search(publishingOrg = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = map_dbl(key,~rgbif::dataset_search(publishingOrg= .x,limit=0)$meta$count)) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(publishingOrg = .x))) %>% -mutate(Company = paste0("https://www.gbif.org/publisher/",key,"[",name,"]")) %>% -mutate(iso2 = map_chr(key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -glimpse() - -dd = ss %>% -dplyr::filter(pd == "dataset") %>% -select(key,`Activity sector`) %>% -mutate(name = map_chr(key,~rgbif::dataset_get(uuid=.x)$title)) %>% -mutate(`Occurrence records` = map_dbl(key,~ -rgbif::occ_search(datasetKey = .x,occurrenceStatus=NULL,limit=0)$meta$count)) %>% -mutate(Datasets = 1) %>% -mutate(`Data citations` = map_dbl(key,~rgbif::lit_count(datasetKey = .x))) %>% -mutate(Company = paste0("(https://www.gbif.org/dataset/",key,")[",name,"]")) %>% -mutate(p_key = map_chr(key,~ rgbif::dataset_get(uuid=.x)$publishingOrganizationKey)) %>% -mutate(iso2 = map_chr(p_key,~rgbif::dataset_search(publishingOrg=.x,limit=1)$data$publishingCountry)) %>% -merge(gbif_country,by="iso2") %>% -select(-p_key) %>% -glimpse() - -# combine tables -tt = rbind(pp,dd) %>% -arrange(name) %>% -select(Company, `Activity sector`, Country = iso2, Datasets, `Occurrence records`, `Data citations`) - -# save csv and clean up -tt %>% -mutate(`Datasets` = trimws(format(`Datasets`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Occurrence records` = trimws(format(`Occurrence records`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Data citations` = trimws(format(`Data citations`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Activity sector` = paste0("{",`Activity sector`,"}")) %>% -mutate(`Country` = paste0("{",`Country`,"}")) %>% -write.table(file = "250-private-sector-table.csv", row.names = FALSE, col.names = FALSE, sep = ",", quote = FALSE) - -# totals table -tt %>% -summarise( -Datasets = sum(Datasets), -`Occurrence records` = sum(`Occurrence records`), -`Data citations` = sum(`Data citations`) -) %>% -mutate(`Datasets` = trimws(format(`Datasets`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Occurrence records` = trimws(format(`Occurrence records`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -mutate(`Data citations` = trimws(format(`Data citations`, nsmall=0, big.mark="\u202F"),which ="left")) %>% -write.table(file = "260-private-sector-totals.csv", row.names = FALSE, col.names = FALSE, sep = ",", quote = FALSE) From 750a4f3753c58c004366eedb6ea0c704ab9411a2 Mon Sep 17 00:00:00 2001 From: MattBlissett Date: Mon, 15 Jul 2024 09:05:09 +0000 Subject: [PATCH 3/3] Apply automatic changes --- 250-private-sector-table.csv | 78 +++++++++++++++++------------------ 260-private-sector-totals.csv | 2 +- 2 files changed, 40 insertions(+), 40 deletions(-) diff --git a/250-private-sector-table.csv b/250-private-sector-table.csv index 6daadca..dc7932b 100644 --- a/250-private-sector-table.csv +++ b/250-private-sector-table.csv @@ -3,60 +3,60 @@ https://www.gbif.org/publisher/f2429cd1-4d45-475c-852a-892024cb4aba[ARC - Arctic https://www.gbif.org/publisher/6d1beb45-43bc-499a-85a0-f06f67e81591[Aguas de Bogotá S.A. E.S.P.],{Utilities},{CO},1,13 280,107 https://www.gbif.org/publisher/620e3d31-d433-4154-9cf6-232a6a6b5e3f[Akvaplan-niva],{Consulting},{NO},3,594,25 https://www.gbif.org/publisher/b5904aaf-02c7-4ff3-85a6-0f528dbb632e[Anadarko Colombia Company],{Energy},{CO},7,1 178,57 -https://www.gbif.org/publisher/df604473-66f0-444d-94c4-22795f268afe[AngloGold Ashanti Colombia S.A.S],{Materials},{CO},5,87 020,196 -https://www.gbif.org/publisher/612c9b58-e739-4af4-a038-4b3901fa5649[Asplan Viak AS],{Engineering},{NO},14,3 775,440 -https://www.gbif.org/publisher/e62a5313-e771-4c81-b6d1-cba6e4085635[Aures Bajo],{Energy},{CO},2,368,47 +https://www.gbif.org/publisher/df604473-66f0-444d-94c4-22795f268afe[AngloGold Ashanti Colombia S.A.S],{Materials},{CO},5,87 020,198 +https://www.gbif.org/publisher/612c9b58-e739-4af4-a038-4b3901fa5649[Asplan Viak AS],{Engineering},{NO},14,3 775,443 +https://www.gbif.org/publisher/e62a5313-e771-4c81-b6d1-cba6e4085635[Aures Bajo],{Energy},{CO},2,368,48 https://www.gbif.org/publisher/83500190-21b6-445c-ab2c-c0565fc0afce[Awake Travel],{Consulting},{CO},1,8 644,21 https://www.gbif.org/publisher/eea64f26-8fd5-49fb-be7e-a1d4cfc051ee[Aïgos SAS],{Consulting},{CO},3,2 404,48 -https://www.gbif.org/publisher/b2c1126d-e3b4-4619-9f94-b236dcc0a947[Biofokus],{Consulting},{NO},1,444 289,1 133 -https://www.gbif.org/publisher/a41046bd-eaca-49bf-919b-419062ffc2a2[Biolog J.B. Jordal AS],{Consulting},{NO},1,177 814,684 -https://www.gbif.org/publisher/8e6bc843-c1b4-4b10-b546-881f06049004[Biotica Consultores Ltda],{Consulting},{CO},4,1 318,219 -https://www.gbif.org/publisher/14fb9c57-68a5-4870-b434-5355df7a9c3c[Carbones del Cerrejón Limited],{Materials},{CO},9,197 100,279 -https://www.gbif.org/publisher/0fd86a13-3d0d-4d6e-b809-2811706f35d6[Celsia Colombia S.A. E.S.P.],{Energy},{CO},11,55 792,63 +https://www.gbif.org/publisher/b2c1126d-e3b4-4619-9f94-b236dcc0a947[Biofokus],{Consulting},{NO},1,674 127,1 136 +https://www.gbif.org/publisher/a41046bd-eaca-49bf-919b-419062ffc2a2[Biolog J.B. Jordal AS],{Consulting},{NO},1,177 814,688 +https://www.gbif.org/publisher/8e6bc843-c1b4-4b10-b546-881f06049004[Biotica Consultores Ltda],{Consulting},{CO},4,1 318,221 +https://www.gbif.org/publisher/14fb9c57-68a5-4870-b434-5355df7a9c3c[Carbones del Cerrejón Limited],{Materials},{CO},9,197 100,281 +https://www.gbif.org/publisher/0fd86a13-3d0d-4d6e-b809-2811706f35d6[Celsia Colombia S.A. E.S.P.],{Energy},{CO},11,55 792,64 https://www.gbif.org/publisher/bbf93124-1cc2-4cac-a101-b4412dd04e2a[Central Hidroeléctrica de Caldas S.A E.S.P],{Energy},{CO},2,3 570,48 -https://www.gbif.org/publisher/1a4f4e64-eb3d-42c3-a359-1be3869b3a20[Cerro Matoso S.A],{Materials},{CO},3,19 309,205 +https://www.gbif.org/publisher/1a4f4e64-eb3d-42c3-a359-1be3869b3a20[Cerro Matoso S.A],{Materials},{CO},3,19 309,207 https://www.gbif.org/publisher/d49251f5-379f-43b4-b747-9d8240334fa5[Chevron Australia],{Energy},{AU},1,2 048,68 -https://www.gbif.org/publisher/03a8bc52-9c2e-4aee-8dd7-9b4d279e4960[Compensation International Progress S.A. -Ciprogress Greenlife-],{Industrials},{CO},1,820,77 +https://www.gbif.org/publisher/03a8bc52-9c2e-4aee-8dd7-9b4d279e4960[Compensation International Progress S.A. -Ciprogress Greenlife-],{Industrials},{CO},1,820,78 https://www.gbif.org/publisher/db41c5c6-d34a-4d27-8ac9-0c8d085393f7[Concesión La Pintada S.A.S],{Industrials},{CO},2,0,0 -https://www.gbif.org/publisher/d3c29fed-bcac-4f84-8d3d-f4b7f76fdc8e[Construcciones y Ambiente Conambiente S.A.S],{Consulting},{CO},10,4 392,82 +https://www.gbif.org/publisher/d3c29fed-bcac-4f84-8d3d-f4b7f76fdc8e[Construcciones y Ambiente Conambiente S.A.S],{Consulting},{CO},10,4 392,84 https://www.gbif.org/publisher/c5245889-c63d-48fa-ae4b-90ddd74f1d2d[Cunaguaro Consultores LTDA],{Consulting},{CO},1,657,67 -https://www.gbif.org/publisher/efc5d3c7-2fec-42dd-85de-078a73973bd1[DNV],{Energy},{NO},1,2 372 473,73 -https://www.gbif.org/publisher/e5150835-f502-424c-b470-24dd496b1b18[EDP],{Energy},{PT},120,1 855 649,475 +https://www.gbif.org/publisher/efc5d3c7-2fec-42dd-85de-078a73973bd1[DNV],{Energy},{NO},1,2 372 473,74 +https://www.gbif.org/publisher/e5150835-f502-424c-b470-24dd496b1b18[EDP],{Energy},{PT},120,1 855 649,477 https://www.gbif.org/publisher/76c3443b-bf10-4fb6-a6e7-aeaa65be383c[ENGIE],{Energy},{FR},20,29 555,10 -https://www.gbif.org/publisher/fac91b96-c087-460f-ab01-b808f341c2f5[Ecofact],{Consulting},{NO},3,12 508,482 +https://www.gbif.org/publisher/fac91b96-c087-460f-ab01-b808f341c2f5[Ecofact],{Consulting},{NO},3,12 508,486 https://www.gbif.org/publisher/3ca2ab24-7f53-458e-b4ad-6e88ea6d9628[Econativa Consultores SpA],{Consulting},{CL},1,3,12 -https://www.gbif.org/publisher/d5ef14a1-5177-4547-9ce2-46d84a4214eb[Ecopetrol S.A.],{Energy},{CO},89,736 568,204 -https://www.gbif.org/publisher/d42b7e5d-a3e5-4fc2-8b3d-105336d70898[Empresas Públicas de Medellín E.S.P.],{Energy},{CO},48,2 278 898,224 -https://www.gbif.org/publisher/f442f96e-2017-4cf5-b19f-1f3320ae7577[Enel Colombia],{Energy},{CO},13,31 192,78 +https://www.gbif.org/publisher/d5ef14a1-5177-4547-9ce2-46d84a4214eb[Ecopetrol S.A.],{Energy},{CO},89,736 568,206 +https://www.gbif.org/publisher/d42b7e5d-a3e5-4fc2-8b3d-105336d70898[Empresas Públicas de Medellín E.S.P.],{Energy},{CO},48,2 278 898,226 +https://www.gbif.org/publisher/f442f96e-2017-4cf5-b19f-1f3320ae7577[Enel Colombia],{Energy},{CO},13,31 192,80 https://www.gbif.org/publisher/51818adb-2745-4201-9397-6d6dc433954f[Equinor],{Energy},{NO},2,1 102,7 -https://www.gbif.org/publisher/d98d7029-8cb7-44c2-88af-52988adc3a62[Faun Naturforvaltning AS],{Consulting},{NO},1,3 787,428 +https://www.gbif.org/publisher/d98d7029-8cb7-44c2-88af-52988adc3a62[Faun Naturforvaltning AS],{Consulting},{NO},1,3 787,431 https://www.gbif.org/publisher/37c1c493-782c-4f53-914d-b1f66cdcf61c[Federación Nacional de Cacaoteros],{Agriculture},{CO},1,17,21 -https://www.gbif.org/publisher/fe602f47-b553-4291-b6e5-197b9837e167[Federación Nacional de Cafeteros de Colombia],{Agriculture},{CO},6,26 804,441 -https://www.gbif.org/publisher/2977895d-3ce2-4fb9-b62e-a775c8fd9304[Grupo Energía Bogotá],{Energy},{CO},1,61 111,162 -https://www.gbif.org/publisher/946b9adc-5ec0-4d76-a143-8bd43444415f[HBH Projekt spol. s r.o.,Kabátnikova 5, 602 00 Brno,ČR – organizačná zložka Slovensko],{Engineering},{SK},14,2 024,13 -https://www.gbif.org/publisher/90d2e455-c279-4bf1-ba87-806495641e18[Hatovial S.A.S],{Engineering},{CO},1,1 898,153 +https://www.gbif.org/publisher/fe602f47-b553-4291-b6e5-197b9837e167[Federación Nacional de Cafeteros de Colombia],{Agriculture},{CO},6,26 804,444 +https://www.gbif.org/publisher/2977895d-3ce2-4fb9-b62e-a775c8fd9304[Grupo Energía Bogotá],{Energy},{CO},1,61 111,164 +https://www.gbif.org/publisher/946b9adc-5ec0-4d76-a143-8bd43444415f[HBH Projekt spol. s r.o.,Kabátnikova 5, 602 00 Brno,ČR – organizačná zložka Slovensko],{Engineering},{SK},14,2 024,14 +https://www.gbif.org/publisher/90d2e455-c279-4bf1-ba87-806495641e18[Hatovial S.A.S],{Engineering},{CO},1,1 898,154 https://www.gbif.org/publisher/2d7ea901-0128-4a7a-8207-425020c1fd99[Holcim Spain],{Mining},{ES},2,35,46 https://www.gbif.org/publisher/67c63221-0c74-4c18-97f9-e2b2acb739ce[INERCO Consultoría Colombia],{Consulting},{CO},1,1 090,183 -https://www.gbif.org/publisher/04ce62dd-30ec-4d98-8b30-b09cafc3ac38[Isagen S.A. E.S.P.],{Energy},{CO},12,41 665,388 +https://www.gbif.org/publisher/04ce62dd-30ec-4d98-8b30-b09cafc3ac38[Isagen S.A. E.S.P.],{Energy},{CO},12,41 665,389 https://www.gbif.org/publisher/b1670923-c90b-4420-be96-1db600ed2109[Lake Tanganyika Floating Health Clinic],{Health Care},{CD},3,337,9 https://www.gbif.org/publisher/54eb018e-54d8-49cc-b98b-37733bb70028[Mineros Aluvial S.A.S. BIC],{Mining},{CO},1,7 307,34 -https://www.gbif.org/publisher/4d14137b-ce2c-4111-98a9-0078f5d53237[Minería Social Incluyente S.A.S.],{Mining},{CO},1,4 159,78 +https://www.gbif.org/publisher/4d14137b-ce2c-4111-98a9-0078f5d53237[Minería Social Incluyente S.A.S.],{Mining},{CO},1,4 159,79 https://www.gbif.org/publisher/9a21807b-b9c5-4071-b393-764f3cd58abc[Moam Monitoreos Ambientales S.A.S],{Consulting},{CO},1,1 781,69 -(https://www.gbif.org/dataset/d0a90634-21fb-4c76-9081-98bf3930ad7c)[Monitoramento fauna e flora Mineração Vale Verde do Brasil Ltda.],{Materials},{BR},1,299,148 -https://www.gbif.org/publisher/359ba517-ca03-46dd-9583-d2be73085c2f[Multiconsult],{Consulting},{NO},1,308,179 -https://www.gbif.org/publisher/a1648ebf-7363-4c27-beb0-23271087220f[NNI Resources AS],{Consulting},{NO},2,3 115,104 -https://www.gbif.org/publisher/99c6eaae-f15b-4656-a600-d0c50044962e[NaturRestaurering AS],{Consulting},{NO},10,17 609,322 -(https://www.gbif.org/dataset/72e23311-b65a-46d0-bc07-ff0a251b47e1)[Nature monitoring data, Amphi Consult and Biomedia, Denmark],{Consulting},{DK},1,47 254,25 +(https://www.gbif.org/dataset/d0a90634-21fb-4c76-9081-98bf3930ad7c)[Monitoramento fauna e flora Mineração Vale Verde do Brasil Ltda.],{Materials},{BR},1,299,149 +https://www.gbif.org/publisher/359ba517-ca03-46dd-9583-d2be73085c2f[Multiconsult],{Consulting},{NO},1,308,181 +https://www.gbif.org/publisher/a1648ebf-7363-4c27-beb0-23271087220f[NNI Resources AS],{Consulting},{NO},2,3 115,105 +https://www.gbif.org/publisher/99c6eaae-f15b-4656-a600-d0c50044962e[NaturRestaurering AS],{Consulting},{NO},10,17 609,325 +(https://www.gbif.org/dataset/72e23311-b65a-46d0-bc07-ff0a251b47e1)[Nature monitoring data, Amphi Consult and Biomedia, Denmark],{Consulting},{DK},1,47 254,26 https://www.gbif.org/publisher/52bd9c22-340b-480d-b414-73db37cd9379[Navantia, S.A.],{Industrials},{ES},6,823,33 https://www.gbif.org/publisher/4e8fae15-2ca7-4493-8c57-573194d29c0f[Nocturne Environmental Surveyors Ltd],{Consulting},{GB},1,32,28 -https://www.gbif.org/publisher/c3da1f49-b2c8-4751-b72f-28855546ec4c[Oleoducto Bicentenario],{Energy},{CO},11,4 161,273 -https://www.gbif.org/publisher/dbc07e15-c05b-4781-9ec3-59d331a9a4d8[Parex Resources Colombia - AG Sucursal],{Energy},{CO},17,215 099,20 -https://www.gbif.org/publisher/9a408a2b-6bbb-4c95-80d9-0dce1fba1c00[Pierre Fabre],{Consumer Staples},{FR},20,4 049,202 -https://www.gbif.org/publisher/dbc2ab56-d499-403c-8db5-c1a49cd0b75f[Promigas S.A E.S.P],{Energy},{CO},12,180 937,328 -https://www.gbif.org/publisher/815809f1-e6e6-44df-b3fd-b17a9d87eada[Regelink Ecology & Landscape],{Consulting},{NL},1,157 976,160 -https://www.gbif.org/publisher/80e15a76-70e8-417d-9111-b2e9e0dd8f18[Rådgivende Biologer],{Consulting},{NO},5,15 214,401 -https://www.gbif.org/publisher/c4444b2c-6b07-40c2-8474-6556a195cd40[SWECO Norge AS],{Engineering},{NO},1,1 139,407 +https://www.gbif.org/publisher/c3da1f49-b2c8-4751-b72f-28855546ec4c[Oleoducto Bicentenario],{Energy},{CO},11,4 161,276 +https://www.gbif.org/publisher/dbc07e15-c05b-4781-9ec3-59d331a9a4d8[Parex Resources Colombia - AG Sucursal],{Energy},{CO},17,215 099,21 +https://www.gbif.org/publisher/9a408a2b-6bbb-4c95-80d9-0dce1fba1c00[Pierre Fabre],{Consumer Staples},{FR},20,4 049,204 +https://www.gbif.org/publisher/dbc2ab56-d499-403c-8db5-c1a49cd0b75f[Promigas S.A E.S.P],{Energy},{CO},13,185 817,330 +https://www.gbif.org/publisher/815809f1-e6e6-44df-b3fd-b17a9d87eada[Regelink Ecology & Landscape],{Consulting},{NL},1,157 976,162 +https://www.gbif.org/publisher/80e15a76-70e8-417d-9111-b2e9e0dd8f18[Rådgivende Biologer],{Consulting},{NO},5,15 214,404 +https://www.gbif.org/publisher/c4444b2c-6b07-40c2-8474-6556a195cd40[SWECO Norge AS],{Engineering},{NO},1,1 139,410 https://www.gbif.org/publisher/2c542862-b9dd-40fc-8260-fb434997efa7[Stratos Consultoría Geológica],{Consulting},{CO},2,1 084,35 -https://www.gbif.org/publisher/f5db868f-e5bf-4208-bd9d-d4063ae1c825[TERRASOS],{Consulting},{CO},20,39 479,284 -https://www.gbif.org/publisher/728e3362-3063-4a43-a6cf-71d61b50025b[TotalEnergies],{Energy},{FR},56,45 783,171 -https://www.gbif.org/publisher/04a12c74-4b26-4994-a51a-8b733a57318b[Veolia Colombia],{Energy},{CO},2,672,13 +https://www.gbif.org/publisher/f5db868f-e5bf-4208-bd9d-d4063ae1c825[TERRASOS],{Consulting},{CO},21,39 794,285 +https://www.gbif.org/publisher/728e3362-3063-4a43-a6cf-71d61b50025b[TotalEnergies],{Energy},{FR},60,47 168,173 +https://www.gbif.org/publisher/04a12c74-4b26-4994-a51a-8b733a57318b[Veolia Colombia],{Energy},{CO},2,672,14 diff --git a/260-private-sector-totals.csv b/260-private-sector-totals.csv index 226aca2..769297c 100644 --- a/260-private-sector-totals.csv +++ b/260-private-sector-totals.csv @@ -1 +1 @@ -595,9 380 476,10 745 +601,9 616 894,10 821