Skip to content

Commit

Permalink
Merge pull request #6 from replicate/build-model-stats
Browse files Browse the repository at this point in the history
Add script to calculate daily model run counts over time
  • Loading branch information
zeke authored Jan 15, 2025
2 parents b05e0a2 + 9b82d34 commit c6cb3f1
Show file tree
Hide file tree
Showing 6 changed files with 408,942 additions and 117 deletions.
23 changes: 23 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

Metadata for all[^1] the public models on Replicate, bundled up into an npm package.

This package also includes [historical daily run counts](#stats) for each model, which are updated daily.

## Installation

Expand Down Expand Up @@ -37,6 +38,28 @@ const mostRun = chain(models).orderBy('run_count', 'desc').take(10).value()
console.log({mostRun})
```

## Stats

This package also includes historical daily run counts for each model, which are updated daily.

```js
import stats from 'all-the-public-replicate-models/stats'

console.log(stats["black-forest-labs/flux-schnell"].slice(-5))

/*
[
{ date: '2025-01-03', totalRuns: 176951005, dailyRuns: 1071498 },
{ date: '2025-01-04', totalRuns: 178025758, dailyRuns: 1074753 },
{ date: '2025-01-05', totalRuns: 179119496, dailyRuns: 1093738 },
{ date: '2025-01-06', totalRuns: 180272877, dailyRuns: 1153381 },
{ date: '2025-01-07', totalRuns: 181445133, dailyRuns: 1172256 }
]
*/
```

See [example.js](example.js) for a code snippet that uses the stats.

## Usage (as a CLI)

The CLI dumps the model metadata to standard output as a big JSON object:
Expand Down
8 changes: 7 additions & 1 deletion example.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,12 @@ const mostRunModels = chain(models)
.take(10)
.value()

console.log("Most run models:")
for (const model of mostRunModels) {
console.log(model.url)
}
}

import stats from './stats.mjs'

console.log("Stats for Flux Schnell:")
console.log(stats["black-forest-labs/flux-schnell"].slice(-5))
5 changes: 3 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,11 @@
"main": "index.mjs",
"exports": {
".": "./index.mjs",
"./lite": "./lite.mjs"
"./lite": "./lite.mjs",
"./stats": "./stats.mjs"
},
"scripts": {
"build": "node script/build.js",
"build": "node script/build.js && node script/stats.js",
"test": "mocha test.js"
},
"engines": {
Expand Down
205 changes: 91 additions & 114 deletions script/stats.js
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,119 +1,96 @@
import { promises as fs } from 'fs';
import oldModels from '../models.old.json' assert { type: "json" };
import newModels from '../models.json' assert { type: "json" };

async function compareAndGenerateMD() {
const newModelUrls = newModels.map(model => model.url);
const oldModelUrls = oldModels.map(model => model.url);

const addedModels = newModels.filter(model => !oldModelUrls.includes(model.url));
const removedModels = oldModels.filter(model => !newModelUrls.includes(model.url));

const activeModels = newModels.map(newModel => {
const oldModel = oldModels.find(m => m.url === newModel.url);
const runCountDiff = oldModel ? newModel.run_count - oldModel.run_count : newModel.run_count;
return { ...newModel, runCountDiff };
})
.filter(model => model.runCountDiff > 100)
.sort((a, b) => b.runCountDiff - a.runCountDiff);

const risingStars = newModels.map(model => {
const oldModel = oldModels.find(m => m.url === model.url);
const runCountDiff = oldModel ? model.run_count - oldModel.run_count : model.run_count;
const percentageIncrease = runCountDiff / model.run_count * 100;
return { ...model, runCountDiff, percentageIncrease };
})
.sort((a, b) => b.percentageIncrease - a.percentageIncrease)
.slice(0, 50);

let markdownContent = '# Model Stats\n';

// New Models
markdownContent += '## New Models\n';
if (addedModels.length > 0) {
for (let model of addedModels) {
markdownContent += `- ${model.url}\n`;
}
} else {
markdownContent += 'No new models today.\n';
}

// Removed Models
markdownContent += '\n## Removed Models\n';
if (removedModels.length > 0) {
for (let model of removedModels) {
markdownContent += `- ${model.url}\n`;
}
} else {
markdownContent += 'No models were removed today.\n';
}

// Rising Stars
markdownContent += '\n## Rising Stars\n';
if (risingStars.length > 0) {
markdownContent += '| Model | Description | Runs Today | Runs Total | % of Total |\n|-------|-------------|------------|------------|------------|\n';
for (let model of risingStars) {
const linkText = `${model.owner}/${model.name}`;
const percentageDisplay = `${model.percentageIncrease.toFixed(2)}%`;
markdownContent += `| [${linkText}](${model.url}) | ${model.description} | ${model.runCountDiff} | ${model.run_count} | ${percentageDisplay} |\n`;
}
} else {
markdownContent += 'No rising stars today.\n';
}

// Active Models
markdownContent += '\n## Active Models\n';
if (activeModels.length > 0) {
markdownContent += '| Model | Description | Runs in the last day |\n|-------|-------------|---------------------|\n';
for (let model of activeModels) {
const linkText = `${model.owner}/${model.name}`;
markdownContent += `| [${linkText}](${model.url}) | ${model.description} | ${model.runCountDiff} |\n`;
}
} else {
markdownContent += 'No active models today.\n';
}

// Top Model Authors
markdownContent += '\n## Top Model Authors by Run Count\n';
const modelsByOwner = newModels.reduce((acc, model) => {
if (!acc[model.owner]) {
acc[model.owner] = [];
}
acc[model.owner].push(model);
return acc;
}, {});

const topAuthors = Object.entries(modelsByOwner).map(([owner, models]) => ({
owner,
modelCount: models.length,
totalRuns: models.reduce((sum, model) => sum + model.run_count, 0),
})).sort((a, b) => b.totalRuns - a.totalRuns).slice(0, 30);


markdownContent += '| Author | Models | Total Runs |\n|--------|--------|------------|\n';
for (const author of topAuthors) {
const ownerUrl = `https://replicate.com/${author.owner}`;
markdownContent += `| [${author.owner}](${ownerUrl}) | ${author.modelCount} | ${author.totalRuns} |\n`;
}


// Top Model Authors by Model Count
markdownContent += '\n## Top Model Authors by Model Count\n';
const authorsByModelCount = Object.entries(modelsByOwner).map(([owner, models]) => ({
owner,
modelCount: models.length,
})).sort((a, b) => b.modelCount - a.modelCount).slice(0, 30);
#!/usr/bin/env node

import { exec } from 'node:child_process';
import { promisify } from 'node:util';
import fs from 'node:fs/promises';

const execAsync = promisify(exec);
const stats = {};

// Get the date 31 days ago in ISO format
const thirtyOneDaysAgo = new Date();
thirtyOneDaysAgo.setDate(thirtyOneDaysAgo.getDate() - 31);
const thirtyOneDaysAgoISO = thirtyOneDaysAgo.toISOString().split('T')[0];

// Get all commit SHAs using async exec
const { stdout: commitsOutput } = await execAsync(
`git log --reverse --since="${thirtyOneDaysAgoISO}" --format="%H"`
);
const commits = commitsOutput.trim().split('\n');

// Skip if no commits found
if (!commits[0]) {
console.log('No commits found in the last 31 days');
process.exit(0);
}

markdownContent += '| Author | Model Count |\n|--------|-------------|\n';
for (const author of authorsByModelCount) {
const ownerUrl = `https://replicate.com/${author.owner}`;
markdownContent += `| [${author.owner}](${ownerUrl}) | ${author.modelCount} |\n`;
// Loop through each commit, starting from the second commit (index 1)
for (const commitSha of commits) {
try {
// Get the commit message
const { stdout: commitMessage } = await execAsync(
`git log --format=%B -n 1 ${commitSha}`
);

// Bail if it's not a build artifacts commit
if (commitMessage.trim() !== 'Build artifacts') continue;

// Get the commit date
const { stdout: commitDate } = await execAsync(
`git log -n 1 --format=%ad --date=iso ${commitSha}`
);
const formattedDate = commitDate.trim().split(' ')[0];

// Instead of using git show, we'll create a temporary file
await execAsync(
`git show ${commitSha}:models-lite.json > temp-models-${commitSha}.json`
);

// Read the temporary file
const modelsJson = await fs.readFile(
`temp-models-${commitSha}.json`,
'utf-8'
);

// Clean up temporary file
await fs.unlink(`temp-models-${commitSha}.json`);

// console.log({ commitSha, date: formattedDate });

const models = JSON.parse(modelsJson);

// Process models data
for (const model of models) {
const nwo = `${model.owner}/${model.name}`;

if (!stats[nwo]) {
stats[nwo] = [];
}

const totalRuns = model.run_count;
const dailyRuns = stats[nwo].length > 0
? totalRuns - stats[nwo][stats[nwo].length - 1].totalRuns
: totalRuns;

const dailyStats = {
date: formattedDate,
totalRuns,
dailyRuns,
};
stats[nwo].push(dailyStats);
}

} catch (error) {
console.error(`Error processing commit ${commitSha}:`, error.message);
}
}

// Write to stats.md
await fs.writeFile('stats.md', markdownContent);
// Remove the first element from each model's stats array,
// because the dailyRuns is not accurate for the first day
for (const nwo in stats) {
stats[nwo] = stats[nwo].slice(1);
}

compareAndGenerateMD().catch(error => {
console.error('Error generating stats:', error);
});
// Write stats to disk
await fs.writeFile('stats.json', JSON.stringify(stats, null, 2));

Loading

0 comments on commit c6cb3f1

Please sign in to comment.