import { PROVIDERS_MAP } from './providers'; export interface ModelData { id: string; name: string; createdAt: string; likes: number; downloads?: number; isDataset?: boolean; provider: string; } export interface Activity { date: string; count: number; level: number; } export interface CalendarData { [key: string]: Activity[]; } export interface MonthlyActivity { date: string; // YYYY-MM format count: number; provider: string; isDataset: boolean | null; // null means it includes both name?: string; } export interface DetailedModelData { id: string; name: string; createdAt: string; likes: number; downloads: number; monthKey: string; // YYYY-MM provider: string; sortKey: string; // YYYY-MM isDataset: boolean; } // Generates calendar data from model data export const generateCalendarData = (modelData: ModelData[]): CalendarData => { const data: CalendarData = Object.fromEntries( Object.keys(PROVIDERS_MAP).map(provider => [provider, []]) ); const today = new Date(); const startDate = new Date(today); startDate.setMonth(today.getMonth() - 11); startDate.setDate(1); // Create a map to store counts for each provider and date const countMap: Record> = {}; if (!Array.isArray(modelData)) { console.error('Model data is not an array:', modelData); modelData = []; } modelData.forEach(item => { const [org] = item.id.split('/'); const provider = Object.entries(PROVIDERS_MAP).find(([_, info]) => info.authors.includes(org) )?.[0]; if (provider) { const date = item.createdAt.split('T')[0]; if (!countMap[provider]) { countMap[provider] = {}; } countMap[provider][date] = (countMap[provider][date] || 0) + 1; } }); // Fill in the data array with actual counts and zero counts Object.keys(PROVIDERS_MAP).forEach(provider => { let currentDate = new Date(startDate); while (currentDate <= today) { const dateStr = currentDate.toISOString().split('T')[0]; const count = countMap[provider]?.[dateStr] || 0; data[provider].push({ date: dateStr, count, level: count === 0 ? 0 : Math.min(4, Math.ceil(count / 2)) }); currentDate.setDate(currentDate.getDate() + 1); } }); return data; }; // Aggregates calendar data from all providers into a single heatmap export const aggregateCalendarData = (calendarData: CalendarData): Activity[] => { if (Object.keys(calendarData).length === 0) return []; // Get the first provider's data to get the date range const firstProviderData = Object.values(calendarData)[0]; // Create a map to store aggregated counts by date const aggregatedCounts: Record = {}; // Sum up counts for each date across all providers Object.values(calendarData).forEach(providerData => { providerData.forEach(activity => { aggregatedCounts[activity.date] = (aggregatedCounts[activity.date] || 0) + activity.count; }); }); // Convert the aggregated counts into Activity array return firstProviderData.map(({ date }) => { const count = aggregatedCounts[date] || 0; return { date, count, level: count === 0 ? 0 : Math.min(4, Math.ceil(count / 3)) }; }); }; export const generateMonthlyData = (modelData: ModelData[]): MonthlyActivity[] => { const monthlyData: Record>> = {}; modelData.forEach(model => { const date = new Date(model.createdAt); const monthKey = `${date.getFullYear()}-${String(date.getMonth() + 1).padStart(2, '0')}`; const provider = model.provider || 'unknown'; const type = model.isDataset ? 'dataset' : 'model'; if (!monthlyData[monthKey]) { monthlyData[monthKey] = {}; } if (!monthlyData[monthKey][provider]) { monthlyData[monthKey][provider] = {}; } if (!monthlyData[monthKey][provider][type]) { monthlyData[monthKey][provider][type] = { date: monthKey, count: 0, provider, isDataset: model.isDataset ?? false, // Use nullish coalescing to provide a default name: model.name }; } monthlyData[monthKey][provider][type].count++; }); // Flatten the nested structure return Object.values(monthlyData) .flatMap(providerData => Object.values(providerData).flatMap(typeData => Object.values(typeData) ) ) .sort((a, b) => a.date.localeCompare(b.date)); }; const delay = (ms: number) => new Promise(resolve => setTimeout(resolve, ms)); async function fetchWithRetry(url: string, retries = 3, delayMs = 1000): Promise { for (let i = 0; i < retries; i++) { try { const response = await fetch(url); if (response.status === 429) { // Rate limit exceeded console.log(`Rate limit exceeded, waiting ${delayMs}ms before retry ${i + 1}/${retries}`); await delay(delayMs); continue; } return response; } catch (error) { if (i === retries - 1) throw error; console.log(`Fetch failed, retrying (${i + 1}/${retries})...`); await delay(delayMs); } } throw new Error('Max retries reached'); } export const fetchAllModelData = async (): Promise => { const allData: ModelData[] = []; for (const [provider, info] of Object.entries(PROVIDERS_MAP)) { console.log(`Fetching data for provider: ${provider}`); for (const author of info.authors) { console.log(` Fetching data for author: ${author}`); try { // Fetch models const modelResponse = await fetchWithRetry( `https://huggingface.co/api/models?author=${author}&sort=likes&direction=-1&limit=10000`, 3, 2000 ); const modelData = await modelResponse.json(); // Fetch datasets const datasetResponse = await fetchWithRetry( `https://huggingface.co/api/datasets?author=${author}&sort=likes&direction=-1&limit=10000`, 3, 2000 ); const datasetData = await datasetResponse.json(); // Combine and process the data const combinedData = [ ...modelData.map((item: any) => ({ id: item.id, name: item.id, createdAt: item.createdAt, likes: item.likes, downloads: item.downloads, isDataset: false, provider })), ...datasetData.map((item: any) => ({ id: item.id, name: item.id, createdAt: item.createdAt, likes: item.likes, downloads: item.downloads, isDataset: true, provider })) ]; allData.push(...combinedData); console.log(` Fetched ${combinedData.length} items (${modelData.length} models, ${datasetData.length} datasets) for ${author}`); } catch (error) { console.error(`Error fetching data for ${provider}/${author}:`, error); } // Add a delay between author requests to avoid rate limiting await delay(1000); } } // Remove duplicates based on id const uniqueData = Array.from( new Map(allData.map(item => [item.id, item])).values() ); console.log(`Total unique items fetched: ${uniqueData.length}`); return uniqueData; }; export function processDetailedModelData(models: ModelData[]): DetailedModelData[] { return models.map(model => { const date = new Date(model.createdAt); const monthKey = `${date.getFullYear()}-${String(date.getMonth() + 1).padStart(2, '0')}`; const provider = model.provider || 'unknown'; const sortKey = `${monthKey}-${model.name}`; return { id: model.id, name: model.name || model.id, createdAt: model.createdAt, likes: model.likes || 0, downloads: model.downloads || 0, // Set downloads to 0 if it's undefined monthKey, provider, sortKey, isDataset: model.isDataset ?? false }; }); } // Helper function to get total monthly data across all providers export const getTotalMonthlyData = (monthlyData: MonthlyActivity[]): MonthlyActivity[] => { const totalByMonth: Record> = {}; // Initialize the structure monthlyData.forEach(({ date }) => { if (!totalByMonth[date]) { totalByMonth[date] = { model: 0, dataset: 0, all: 0 }; } }); // Aggregate counts by month and type monthlyData.forEach(({ date, count, isDataset }) => { const type = isDataset ? 'dataset' : 'model'; totalByMonth[date][type] += count; totalByMonth[date].all += count; // Track combined count }); // Convert to MonthlyActivity array return Object.entries(totalByMonth).flatMap(([date, counts]) => [ { date, count: counts.all, provider: 'Total', isDataset: null // null means it includes both }, { date, count: counts.model, provider: 'Total', isDataset: false }, { date, count: counts.dataset, provider: 'Total', isDataset: true } ]).sort((a, b) => a.date.localeCompare(b.date)); }; // Convert monthly activity data to CSV format export const convertToCSV = (data: MonthlyActivity[]): string => { // Group data by date const dataByDate: Record> = {}; const providers = new Set(); // Initialize and collect data data.forEach(({ date, provider, count }) => { if (!dataByDate[date]) { dataByDate[date] = {}; } dataByDate[date][provider] = count; providers.add(provider); }); // Create CSV header const header = ['Date', ...Array.from(providers)]; // Create CSV rows const rows = Object.entries(dataByDate) .sort(([a], [b]) => a.localeCompare(b)) .map(([date, providerData]) => { const row = [date]; header.slice(1).forEach(provider => { row.push((providerData[provider] || 0).toString()); }); return row; }); // Combine header and rows return [header, ...rows] .map(row => row.join(',')) .join('\n'); };