{"payload":{"pageCount":1,"repositories":[{"type":"Public","name":"FlexiGen","owner":"FMInference","isFork":false,"description":"Running large language models on a single GPU for throughput-oriented scenarios.","allTopics":["machine-learning","offloading","high-throughput","opt","gpt-3","large-language-models","deep-learning"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":6,"issueCount":50,"starsCount":9143,"forksCount":540,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-09-27T07:25:47.211Z"}},{"type":"Public","name":"H2O","owner":"FMInference","isFork":false,"description":"[NeurIPS'23] H2O: Heavy-Hitter Oracle for Efficient Generative Inference of Large Language Models.","allTopics":["sparsity","high-throughput","heavy-hitters","kv-cache","gpt-3","large-language-models"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":1,"issueCount":30,"starsCount":366,"forksCount":37,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-08-01T19:41:25.065Z"}},{"type":"Public","name":"DejaVu","owner":"FMInference","isFork":false,"description":"","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":1,"issueCount":25,"starsCount":272,"forksCount":33,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-04-02T21:51:45.539Z"}}],"repositoryCount":3,"userInfo":null,"searchable":true,"definitions":[],"typeFilters":[{"id":"all","text":"All"},{"id":"public","text":"Public"},{"id":"source","text":"Sources"},{"id":"fork","text":"Forks"},{"id":"archived","text":"Archived"},{"id":"template","text":"Templates"}],"compactMode":false},"title":"FMInference repositories"}