#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Merge all keyword files and aggregate search volumes """ import os from collections import defaultdict def merge_keyword_files(): """Merge all country keyword files and aggregate search volumes""" output_dir = 'output' keyword_volumes = defaultdict(int) # Get all keyword files keyword_files = [f for f in os.listdir(output_dir) if f.endswith('_keywords.txt')] print(f"Found {len(keyword_files)} keyword files") # Process each file for filename in sorted(keyword_files): filepath = os.path.join(output_dir, filename) country = filename.replace('_keywords.txt', '') print(f"Processing {filename}...") line_count = 0 with open(filepath, 'r', encoding='utf-8') as f: for line in f: line = line.strip() if not line: continue # Split by tab parts = line.split('\t') if len(parts) >= 2: keyword = parts[0] volume = int(parts[1]) keyword_volumes[keyword] += volume line_count += 1 print(f" Loaded {line_count} keywords") # Sort by volume (descending) sorted_keywords = sorted(keyword_volumes.items(), key=lambda x: x[1], reverse=True) # Save to all_keywords.txt output_file = os.path.join(output_dir, 'all_keywords.txt') with open(output_file, 'w', encoding='utf-8') as f: for keyword, volume in sorted_keywords: f.write(f"{keyword}\t{volume}\n") print(f"\nSummary:") print(f" Total unique keywords: {len(keyword_volumes)}") print(f" Total search volume: {sum(keyword_volumes.values()):,}") print(f"\nTop 20 keywords:") for i, (keyword, volume) in enumerate(sorted_keywords[:20], 1): print(f" {i:2d}. {keyword}\t{volume:,}") print(f"\nSaved to {output_file}") if __name__ == "__main__": print("Merging keyword files...") merge_keyword_files() print("\nDone!")