-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #6 from replicate/build-model-stats
Add script to calculate daily model run counts over time
- Loading branch information
Showing
6 changed files
with
408,942 additions
and
117 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,119 +1,96 @@ | ||
import { promises as fs } from 'fs'; | ||
import oldModels from '../models.old.json' assert { type: "json" }; | ||
import newModels from '../models.json' assert { type: "json" }; | ||
|
||
async function compareAndGenerateMD() { | ||
const newModelUrls = newModels.map(model => model.url); | ||
const oldModelUrls = oldModels.map(model => model.url); | ||
|
||
const addedModels = newModels.filter(model => !oldModelUrls.includes(model.url)); | ||
const removedModels = oldModels.filter(model => !newModelUrls.includes(model.url)); | ||
|
||
const activeModels = newModels.map(newModel => { | ||
const oldModel = oldModels.find(m => m.url === newModel.url); | ||
const runCountDiff = oldModel ? newModel.run_count - oldModel.run_count : newModel.run_count; | ||
return { ...newModel, runCountDiff }; | ||
}) | ||
.filter(model => model.runCountDiff > 100) | ||
.sort((a, b) => b.runCountDiff - a.runCountDiff); | ||
|
||
const risingStars = newModels.map(model => { | ||
const oldModel = oldModels.find(m => m.url === model.url); | ||
const runCountDiff = oldModel ? model.run_count - oldModel.run_count : model.run_count; | ||
const percentageIncrease = runCountDiff / model.run_count * 100; | ||
return { ...model, runCountDiff, percentageIncrease }; | ||
}) | ||
.sort((a, b) => b.percentageIncrease - a.percentageIncrease) | ||
.slice(0, 50); | ||
|
||
let markdownContent = '# Model Stats\n'; | ||
|
||
// New Models | ||
markdownContent += '## New Models\n'; | ||
if (addedModels.length > 0) { | ||
for (let model of addedModels) { | ||
markdownContent += `- ${model.url}\n`; | ||
} | ||
} else { | ||
markdownContent += 'No new models today.\n'; | ||
} | ||
|
||
// Removed Models | ||
markdownContent += '\n## Removed Models\n'; | ||
if (removedModels.length > 0) { | ||
for (let model of removedModels) { | ||
markdownContent += `- ${model.url}\n`; | ||
} | ||
} else { | ||
markdownContent += 'No models were removed today.\n'; | ||
} | ||
|
||
// Rising Stars | ||
markdownContent += '\n## Rising Stars\n'; | ||
if (risingStars.length > 0) { | ||
markdownContent += '| Model | Description | Runs Today | Runs Total | % of Total |\n|-------|-------------|------------|------------|------------|\n'; | ||
for (let model of risingStars) { | ||
const linkText = `${model.owner}/${model.name}`; | ||
const percentageDisplay = `${model.percentageIncrease.toFixed(2)}%`; | ||
markdownContent += `| [${linkText}](${model.url}) | ${model.description} | ${model.runCountDiff} | ${model.run_count} | ${percentageDisplay} |\n`; | ||
} | ||
} else { | ||
markdownContent += 'No rising stars today.\n'; | ||
} | ||
|
||
// Active Models | ||
markdownContent += '\n## Active Models\n'; | ||
if (activeModels.length > 0) { | ||
markdownContent += '| Model | Description | Runs in the last day |\n|-------|-------------|---------------------|\n'; | ||
for (let model of activeModels) { | ||
const linkText = `${model.owner}/${model.name}`; | ||
markdownContent += `| [${linkText}](${model.url}) | ${model.description} | ${model.runCountDiff} |\n`; | ||
} | ||
} else { | ||
markdownContent += 'No active models today.\n'; | ||
} | ||
|
||
// Top Model Authors | ||
markdownContent += '\n## Top Model Authors by Run Count\n'; | ||
const modelsByOwner = newModels.reduce((acc, model) => { | ||
if (!acc[model.owner]) { | ||
acc[model.owner] = []; | ||
} | ||
acc[model.owner].push(model); | ||
return acc; | ||
}, {}); | ||
|
||
const topAuthors = Object.entries(modelsByOwner).map(([owner, models]) => ({ | ||
owner, | ||
modelCount: models.length, | ||
totalRuns: models.reduce((sum, model) => sum + model.run_count, 0), | ||
})).sort((a, b) => b.totalRuns - a.totalRuns).slice(0, 30); | ||
|
||
|
||
markdownContent += '| Author | Models | Total Runs |\n|--------|--------|------------|\n'; | ||
for (const author of topAuthors) { | ||
const ownerUrl = `https://replicate.com/${author.owner}`; | ||
markdownContent += `| [${author.owner}](${ownerUrl}) | ${author.modelCount} | ${author.totalRuns} |\n`; | ||
} | ||
|
||
|
||
// Top Model Authors by Model Count | ||
markdownContent += '\n## Top Model Authors by Model Count\n'; | ||
const authorsByModelCount = Object.entries(modelsByOwner).map(([owner, models]) => ({ | ||
owner, | ||
modelCount: models.length, | ||
})).sort((a, b) => b.modelCount - a.modelCount).slice(0, 30); | ||
#!/usr/bin/env node | ||
|
||
import { exec } from 'node:child_process'; | ||
import { promisify } from 'node:util'; | ||
import fs from 'node:fs/promises'; | ||
|
||
const execAsync = promisify(exec); | ||
const stats = {}; | ||
|
||
// Get the date 31 days ago in ISO format | ||
const thirtyOneDaysAgo = new Date(); | ||
thirtyOneDaysAgo.setDate(thirtyOneDaysAgo.getDate() - 31); | ||
const thirtyOneDaysAgoISO = thirtyOneDaysAgo.toISOString().split('T')[0]; | ||
|
||
// Get all commit SHAs using async exec | ||
const { stdout: commitsOutput } = await execAsync( | ||
`git log --reverse --since="${thirtyOneDaysAgoISO}" --format="%H"` | ||
); | ||
const commits = commitsOutput.trim().split('\n'); | ||
|
||
// Skip if no commits found | ||
if (!commits[0]) { | ||
console.log('No commits found in the last 31 days'); | ||
process.exit(0); | ||
} | ||
|
||
markdownContent += '| Author | Model Count |\n|--------|-------------|\n'; | ||
for (const author of authorsByModelCount) { | ||
const ownerUrl = `https://replicate.com/${author.owner}`; | ||
markdownContent += `| [${author.owner}](${ownerUrl}) | ${author.modelCount} |\n`; | ||
// Loop through each commit, starting from the second commit (index 1) | ||
for (const commitSha of commits) { | ||
try { | ||
// Get the commit message | ||
const { stdout: commitMessage } = await execAsync( | ||
`git log --format=%B -n 1 ${commitSha}` | ||
); | ||
|
||
// Bail if it's not a build artifacts commit | ||
if (commitMessage.trim() !== 'Build artifacts') continue; | ||
|
||
// Get the commit date | ||
const { stdout: commitDate } = await execAsync( | ||
`git log -n 1 --format=%ad --date=iso ${commitSha}` | ||
); | ||
const formattedDate = commitDate.trim().split(' ')[0]; | ||
|
||
// Instead of using git show, we'll create a temporary file | ||
await execAsync( | ||
`git show ${commitSha}:models-lite.json > temp-models-${commitSha}.json` | ||
); | ||
|
||
// Read the temporary file | ||
const modelsJson = await fs.readFile( | ||
`temp-models-${commitSha}.json`, | ||
'utf-8' | ||
); | ||
|
||
// Clean up temporary file | ||
await fs.unlink(`temp-models-${commitSha}.json`); | ||
|
||
// console.log({ commitSha, date: formattedDate }); | ||
|
||
const models = JSON.parse(modelsJson); | ||
|
||
// Process models data | ||
for (const model of models) { | ||
const nwo = `${model.owner}/${model.name}`; | ||
|
||
if (!stats[nwo]) { | ||
stats[nwo] = []; | ||
} | ||
|
||
const totalRuns = model.run_count; | ||
const dailyRuns = stats[nwo].length > 0 | ||
? totalRuns - stats[nwo][stats[nwo].length - 1].totalRuns | ||
: totalRuns; | ||
|
||
const dailyStats = { | ||
date: formattedDate, | ||
totalRuns, | ||
dailyRuns, | ||
}; | ||
stats[nwo].push(dailyStats); | ||
} | ||
|
||
} catch (error) { | ||
console.error(`Error processing commit ${commitSha}:`, error.message); | ||
} | ||
} | ||
|
||
// Write to stats.md | ||
await fs.writeFile('stats.md', markdownContent); | ||
// Remove the first element from each model's stats array, | ||
// because the dailyRuns is not accurate for the first day | ||
for (const nwo in stats) { | ||
stats[nwo] = stats[nwo].slice(1); | ||
} | ||
|
||
compareAndGenerateMD().catch(error => { | ||
console.error('Error generating stats:', error); | ||
}); | ||
// Write stats to disk | ||
await fs.writeFile('stats.json', JSON.stringify(stats, null, 2)); | ||
|
Oops, something went wrong.