-- Fix provider_name based on repository URLs across ALL tables -- This script updates the provider_name field to match the actual provider from the repository URL -- ============================================= -- 1. Fix all_repositories table -- ============================================= UPDATE all_repositories SET provider_name = 'github' WHERE repository_url LIKE '%github.com%' OR repository_url LIKE '%github.io%'; UPDATE all_repositories SET provider_name = 'gitlab' WHERE repository_url LIKE '%gitlab.com%' OR repository_url LIKE '%gitlab.io%'; UPDATE all_repositories SET provider_name = 'bitbucket' WHERE repository_url LIKE '%bitbucket.org%' OR repository_url LIKE '%bitbucket.io%'; UPDATE all_repositories SET provider_name = 'gitea' WHERE repository_url LIKE '%gitea.com%' OR repository_url LIKE '%gitea.io%'; -- ============================================= -- 2. Fix repository_storage table (linked to all_repositories) -- ============================================= UPDATE repository_storage SET provider_name = ar.provider_name FROM all_repositories ar WHERE repository_storage.repository_id = ar.id; -- ============================================= -- 3. Fix repository_commit_details table (linked to all_repositories) -- ============================================= UPDATE repository_commit_details SET provider_name = ar.provider_name FROM all_repositories ar WHERE repository_commit_details.repository_id = ar.id; -- ============================================= -- 4. Fix repository_commit_files table (linked to all_repositories) -- ============================================= UPDATE repository_commit_files SET provider_name = ar.provider_name FROM all_repositories ar WHERE repository_commit_files.repository_id = ar.id; -- ============================================= -- 5. Fix repository_directories table (linked to all_repositories) -- ============================================= UPDATE repository_directories SET provider_name = ar.provider_name FROM all_repositories ar WHERE repository_directories.repository_id = ar.id; -- ============================================= -- 6. Fix repository_files table (linked to all_repositories) -- ============================================= UPDATE repository_files SET provider_name = ar.provider_name FROM all_repositories ar WHERE repository_files.repository_id = ar.id; -- ============================================= -- 7. Show results for verification -- ============================================= -- Show all_repositories results SELECT 'all_repositories' as table_name, repository_url, repository_name, owner_name, provider_name, CASE WHEN repository_url LIKE '%github.com%' OR repository_url LIKE '%github.io%' THEN 'github' WHEN repository_url LIKE '%gitlab.com%' OR repository_url LIKE '%gitlab.io%' THEN 'gitlab' WHEN repository_url LIKE '%bitbucket.org%' OR repository_url LIKE '%bitbucket.io%' THEN 'bitbucket' WHEN repository_url LIKE '%gitea.com%' OR repository_url LIKE '%gitea.io%' THEN 'gitea' ELSE 'unknown' END as detected_provider FROM all_repositories ORDER BY provider_name, repository_name; -- Show summary counts by provider SELECT 'Summary by Provider' as info, provider_name, COUNT(*) as count FROM all_repositories GROUP BY provider_name ORDER BY provider_name;