Skip to content

Commit

Permalink
Add results (#15)
Browse files Browse the repository at this point in the history
* Add res

* Add res

* Add res

* resize flores files

* reduce size of flores

* move files into folder

* move fils into correct folder

* resize flores

---------

Co-authored-by: Kenneth Enevoldsen <[email protected]>
  • Loading branch information
Muennighoff and KennethEnevoldsen committed Aug 14, 2024
1 parent d4cd5e2 commit 319e81d
Show file tree
Hide file tree
Showing 6,294 changed files with 3,199,170 additions and 15 deletions.
The diff you're trying to view is too large. We only load the first 3000 changed files.
43 changes: 43 additions & 0 deletions reduce_large_json_files.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import glob
import json
import os
from pathlib import Path

import mteb


def resize_flores():
"""
includes only relevant splits from the FloresBitextMining.json files
"""
paths = Path(__file__).parent.glob("**/FloresBitextMining.json")

for p in paths:
try:
res = mteb.MTEBResults.from_disk(p)
res.validate_and_filter_scores()
res.to_disk(p)
except Exception:
pass


def remove_spaces():
"""
removes spaces from the json files
"""

for file in glob.glob("results/*/*/*.json"):
# if the file is greater than 9 MB, compress it with gzip
if os.path.getsize(file) >= 9.5 * 1024 * 1024:
print(f"Resizing {file} to have no indentations")
# read it in as json and write it out with no indent
with open(file, "r") as f:
data = json.load(f)

with open(file, "w") as f:
json.dump(data, f, indent=None)


if __name__ == "__main__":
resize_flores()
remove_spaces()
15 changes: 0 additions & 15 deletions remove_spaces_from_large_json_files.py

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
{
"dataset_revision": "b44c3b011063adb25877c13823db83bb193913c4",
"evaluation_time": 17.481793880462646,
"kg_co2_emissions": 0.00635233445280417,
"mteb_version": "1.12.75",
"scores": {
"test": [
{
"cosine_pearson": NaN,
"cosine_spearman": NaN,
"euclidean_pearson": NaN,
"euclidean_spearman": NaN,
"hf_subset": "default",
"languages": [
"cmn-Hans"
],
"main_score": NaN,
"manhattan_pearson": NaN,
"manhattan_spearman": NaN,
"pearson": NaN,
"spearman": NaN
}
],
"validation": [
{
"cosine_pearson": 0.3460463903690384,
"cosine_spearman": 0.35586367340629843,
"euclidean_pearson": 0.34694863917607155,
"euclidean_spearman": 0.35586367343285613,
"hf_subset": "default",
"languages": [
"cmn-Hans"
],
"main_score": 0.35586367340629843,
"manhattan_pearson": 0.3450887143231004,
"manhattan_spearman": 0.3538154481140048,
"pearson": 0.3460463903690384,
"spearman": 0.35586367340629843
}
]
},
"task_name": "AFQMC"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
{
"dataset_revision": "4106e6bcc72e0698d714ea8b101355e3e238431a",
"evaluation_time": 9.759849071502686,
"kg_co2_emissions": 0.00204240764621667,
"mteb_version": "1.12.75",
"scores": {
"test": [
{
"hf_subset": "default",
"languages": [
"eng-Latn"
],
"main_score": 0.35292,
"map_at_1": 0.11428,
"map_at_10": 0.26352,
"map_at_100": 0.31071,
"map_at_1000": 0.3141,
"map_at_20": 0.28666,
"map_at_3": 0.20542,
"map_at_5": 0.24204,
"mrr_at_1": 0.34,
"mrr_at_10": 0.4388809523809523,
"mrr_at_100": 0.4488417903511672,
"mrr_at_1000": 0.4493337330455302,
"mrr_at_20": 0.4434875957375957,
"mrr_at_3": 0.4,
"mrr_at_5": 0.42199999999999993,
"nauc_map_at_1000_diff1": 0.08452585886817028,
"nauc_map_at_1000_max": 0.16982583119273284,
"nauc_map_at_1000_std": -0.029692049599488294,
"nauc_map_at_100_diff1": 0.0855831125113519,
"nauc_map_at_100_max": 0.1742596859553468,
"nauc_map_at_100_std": -0.028119674358173505,
"nauc_map_at_10_diff1": 0.09564835470275938,
"nauc_map_at_10_max": 0.1641637521112375,
"nauc_map_at_10_std": -0.039465910929794304,
"nauc_map_at_1_diff1": -0.0074679114742572,
"nauc_map_at_1_max": 0.15197995190627844,
"nauc_map_at_1_std": 0.25610139108570495,
"nauc_map_at_20_diff1": 0.08800078509535833,
"nauc_map_at_20_max": 0.166014287647395,
"nauc_map_at_20_std": -0.029668817953378907,
"nauc_map_at_3_diff1": 0.10996705130720544,
"nauc_map_at_3_max": 0.16003330818586703,
"nauc_map_at_3_std": -0.005324148442651372,
"nauc_map_at_5_diff1": 0.09520460466700302,
"nauc_map_at_5_max": 0.1787019018933924,
"nauc_map_at_5_std": -0.03712088444056514,
"nauc_mrr_at_1000_diff1": 0.13917868210553688,
"nauc_mrr_at_1000_max": 0.19835233595498217,
"nauc_mrr_at_1000_std": -0.12461051835152313,
"nauc_mrr_at_100_diff1": 0.1378429450943103,
"nauc_mrr_at_100_max": 0.1987168081766317,
"nauc_mrr_at_100_std": -0.12459915733770997,
"nauc_mrr_at_10_diff1": 0.14027517897255265,
"nauc_mrr_at_10_max": 0.19774188847331753,
"nauc_mrr_at_10_std": -0.11743787366999998,
"nauc_mrr_at_1_diff1": 0.20405946103500341,
"nauc_mrr_at_1_max": 0.2460584541190809,
"nauc_mrr_at_1_std": -0.06627096637430789,
"nauc_mrr_at_20_diff1": 0.14107174000269407,
"nauc_mrr_at_20_max": 0.1958367723173387,
"nauc_mrr_at_20_std": -0.12697328972550517,
"nauc_mrr_at_3_diff1": 0.15092140563838655,
"nauc_mrr_at_3_max": 0.20096318775564032,
"nauc_mrr_at_3_std": -0.13132339358754466,
"nauc_mrr_at_5_diff1": 0.14473392814073807,
"nauc_mrr_at_5_max": 0.1957821035954736,
"nauc_mrr_at_5_std": -0.13971832273995155,
"nauc_ndcg_at_1000_diff1": 0.08119000930033833,
"nauc_ndcg_at_1000_max": 0.18051033217371604,
"nauc_ndcg_at_1000_std": -0.0645469376506639,
"nauc_ndcg_at_100_diff1": 0.06620184711163733,
"nauc_ndcg_at_100_max": 0.2054641361340766,
"nauc_ndcg_at_100_std": -0.05234576931588993,
"nauc_ndcg_at_10_diff1": 0.07899603042075483,
"nauc_ndcg_at_10_max": 0.1493529002966705,
"nauc_ndcg_at_10_std": -0.07917902580385117,
"nauc_ndcg_at_1_diff1": 0.20405946103500341,
"nauc_ndcg_at_1_max": 0.2460584541190809,
"nauc_ndcg_at_1_std": -0.06627096637430789,
"nauc_ndcg_at_20_diff1": 0.07689663383842478,
"nauc_ndcg_at_20_max": 0.158848648052617,
"nauc_ndcg_at_20_std": -0.06549632158447359,
"nauc_ndcg_at_3_diff1": 0.1699044661620425,
"nauc_ndcg_at_3_max": 0.21746865302594978,
"nauc_ndcg_at_3_std": -0.09822077141367175,
"nauc_ndcg_at_5_diff1": 0.09503538473125193,
"nauc_ndcg_at_5_max": 0.1876180479262501,
"nauc_ndcg_at_5_std": -0.09065010752198567,
"nauc_precision_at_1000_diff1": -0.012933222614817127,
"nauc_precision_at_1000_max": -0.02182544351842821,
"nauc_precision_at_1000_std": -0.03633539641823346,
"nauc_precision_at_100_diff1": 0.011964412756940292,
"nauc_precision_at_100_max": 0.05215688374476966,
"nauc_precision_at_100_std": -0.018306163508541088,
"nauc_precision_at_10_diff1": 0.10196021419398599,
"nauc_precision_at_10_max": 0.19815366722395872,
"nauc_precision_at_10_std": -0.15117152479949625,
"nauc_precision_at_1_diff1": 0.20405946103500341,
"nauc_precision_at_1_max": 0.2460584541190809,
"nauc_precision_at_1_std": -0.06627096637430789,
"nauc_precision_at_20_diff1": 0.06205234579952439,
"nauc_precision_at_20_max": 0.10319687633457403,
"nauc_precision_at_20_std": -0.07819535110731454,
"nauc_precision_at_3_diff1": 0.2573609981973061,
"nauc_precision_at_3_max": 0.28629835165482526,
"nauc_precision_at_3_std": -0.21805521190484614,
"nauc_precision_at_5_diff1": 0.14711738871520008,
"nauc_precision_at_5_max": 0.2710566473127884,
"nauc_precision_at_5_std": -0.205662870329016,
"nauc_recall_at_1000_diff1": NaN,
"nauc_recall_at_1000_max": NaN,
"nauc_recall_at_1000_std": NaN,
"nauc_recall_at_100_diff1": -0.26794857495091023,
"nauc_recall_at_100_max": 0.38065398402884026,
"nauc_recall_at_100_std": 0.0610824534727243,
"nauc_recall_at_10_diff1": -0.019345248858768805,
"nauc_recall_at_10_max": 0.0406002146388072,
"nauc_recall_at_10_std": -0.08173226542876458,
"nauc_recall_at_1_diff1": -0.0074679114742572,
"nauc_recall_at_1_max": 0.15197995190627844,
"nauc_recall_at_1_std": 0.25610139108570495,
"nauc_recall_at_20_diff1": -0.02783514941645784,
"nauc_recall_at_20_max": 0.060498916007381304,
"nauc_recall_at_20_std": -0.04638916123390442,
"nauc_recall_at_3_diff1": 0.04939654766343653,
"nauc_recall_at_3_max": 0.08324177698491168,
"nauc_recall_at_3_std": -0.06935823183507292,
"nauc_recall_at_5_diff1": 0.019803670904629492,
"nauc_recall_at_5_max": 0.08012562758427709,
"nauc_recall_at_5_std": -0.12195322908572236,
"ndcg_at_1": 0.34,
"ndcg_at_10": 0.35292,
"ndcg_at_100": 0.49244,
"ndcg_at_1000": 0.51718,
"ndcg_at_20": 0.40089,
"ndcg_at_3": 0.32571,
"ndcg_at_5": 0.33696,
"precision_at_1": 0.34,
"precision_at_10": 0.138,
"precision_at_100": 0.0342,
"precision_at_1000": 0.0039,
"precision_at_20": 0.098,
"precision_at_3": 0.26,
"precision_at_5": 0.216,
"recall_at_1": 0.11428,
"recall_at_10": 0.41708,
"recall_at_100": 0.87547,
"recall_at_1000": 1.0,
"recall_at_20": 0.54846,
"recall_at_3": 0.24871,
"recall_at_5": 0.3371
}
]
},
"task_name": "AILACasedocs"
}
Loading

0 comments on commit 319e81d

Please sign in to comment.