collect-japan-news/main.py

from fastapi import FastAPI, HTTPException
from fastapi.responses import FileResponse
from fastapi.staticfiles import StaticFiles
from pydantic import BaseModel
from typing import List, Dict
import database
from scraper import NewsScraper
from datetime import datetime
import os

app = FastAPI()

# Initialize DB
database.init_db()

# Serve static files
os.makedirs("static", exist_ok=True)
app.mount("/static", StaticFiles(directory="static"), name="static")

@app.get("/")
async def read_root():
    return FileResponse('static/index.html')

@app.post("/api/collect-news")
async def collect_news():
    scraper = NewsScraper()
    categories = ["Economy", "Society", "Lifestyle", "Health"]
    results = {}

    total_count = 0
    for cat in categories:
        articles = scraper.scrape_category(cat, limit=5)
        for article in articles:
            database.save_article(article)
        results[cat] = len(articles)
        total_count += len(articles)

    return {"status": "success", "collected_count": total_count, "details": results}

@app.get("/api/dates")
async def get_dates():
    dates = database.get_available_dates()
    return {"dates": dates}

from fastapi.responses import FileResponse, Response
import json

@app.get("/api/download-json")
async def download_json(date: str = None):
    # Reuse get_news logic
    categories = ["Economy", "Society", "Lifestyle", "Health"]
    data = {}

    # Use provided date or today/latest if None
    # If date is None, get_articles(limit=5) gets latest regardless of date.
    # To be precise for file name, if date is None, we might want to know the date of collected items?
    # For now let's just use the logic we have.

    for cat in categories:
        articles = database.get_articles(category=cat, collection_date=date, limit=5)
        # Convert sqlite rows to dicts if not already (get_articles does it)
        data[cat] = articles

    file_date = date if date else datetime.now().strftime("%Y-%m-%d")
    filename = f"japan-news-{file_date}.json"
    json_content = json.dumps(data, indent=2, ensure_ascii=False)

    return Response(
        content=json_content,
        media_type="application/json",
        headers={"Content-Disposition": f"attachment; filename={filename}"}
    )

@app.get("/api/news")
async def get_news(date: str = None):
    # Helper to restructure for frontend
    categories = ["Economy", "Society", "Lifestyle", "Health"]
    response_data = {}

    for cat in categories:
        articles = database.get_articles(category=cat, collection_date=date, limit=5)
        response_data[cat] = articles

    return response_data


@app.get("/api/today")
async def get_today_news():
    """
    Get today's news. If no articles exist for today, collect them first.
    Returns JSON with all categories.
    """
    today = datetime.now().strftime("%Y-%m-%d")
    categories = ["Economy", "Society", "Lifestyle", "Health"]

    # Check if we have any articles for today
    has_today_articles = False
    for cat in categories:
        articles = database.get_articles(category=cat, collection_date=today, limit=1)
        if articles:
            has_today_articles = True
            break

    # If no articles for today, collect them
    if not has_today_articles:
        scraper = NewsScraper()
        for cat in categories:
            articles = scraper.scrape_category(cat, limit=5)
            for article in articles:
                database.save_article(article)

    # Return today's articles
    response_data = {
        "date": today,
        "articles": {}
    }

    total_count = 0
    for cat in categories:
        articles = database.get_articles(category=cat, collection_date=today, limit=5)
        response_data["articles"][cat] = articles
        total_count += len(articles)

    response_data["total_count"] = total_count

    return response_data