merge_keywords.py 2.1 KB
Edit Raw Blame History

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Merge all keyword files and aggregate search volumes
"""

import os
from collections import defaultdict

def merge_keyword_files():
    """Merge all country keyword files and aggregate search volumes"""

    output_dir = 'output'
    keyword_volumes = defaultdict(int)

    # Get all keyword files
    keyword_files = [f for f in os.listdir(output_dir) if f.endswith('_keywords.txt')]

    print(f"Found {len(keyword_files)} keyword files")

    # Process each file
    for filename in sorted(keyword_files):
        filepath = os.path.join(output_dir, filename)
        country = filename.replace('_keywords.txt', '')

        print(f"Processing {filename}...")

        line_count = 0
        with open(filepath, 'r', encoding='utf-8') as f:
            for line in f:
                line = line.strip()
                if not line:
                    continue

                # Split by tab
                parts = line.split('\t')
                if len(parts) >= 2:
                    keyword = parts[0]
                    volume = int(parts[1])
                    keyword_volumes[keyword] += volume
                    line_count += 1

        print(f"  Loaded {line_count} keywords")

    # Sort by volume (descending)
    sorted_keywords = sorted(keyword_volumes.items(), key=lambda x: x[1], reverse=True)

    # Save to all_keywords.txt
    output_file = os.path.join(output_dir, 'all_keywords.txt')
    with open(output_file, 'w', encoding='utf-8') as f:
        for keyword, volume in sorted_keywords:
            f.write(f"{keyword}\t{volume}\n")

    print(f"\nSummary:")
    print(f"  Total unique keywords: {len(keyword_volumes)}")
    print(f"  Total search volume: {sum(keyword_volumes.values()):,}")
    print(f"\nTop 20 keywords:")
    for i, (keyword, volume) in enumerate(sorted_keywords[:20], 1):
        print(f"  {i:2d}. {keyword}\t{volume:,}")

    print(f"\nSaved to {output_file}")

if __name__ == "__main__":
    print("Merging keyword files...")
    merge_keyword_files()
    print("\nDone!")