Initial commit: Japan Senior News Collector

- FastAPI backend with news scraping from Yahoo Japan - SQLite database for article storage - Web UI with dark mode, article modal, statistics dashboard - Docker support for containerized deployment - API endpoints: /api/today, /api/news, /api/collect-news, /api/dates, /api/download-json - Auto-collect feature when requesting today news - Content filtering for articles without body text
2025-12-15 15:55:37 +09:00
commit 56a6de61ce
11 changed files with 1358 additions and 0 deletions
--- a/main.py
+++ b/main.py
@@ -0,0 +1,126 @@
+from fastapi import FastAPI, HTTPException
+from fastapi.responses import FileResponse
+from fastapi.staticfiles import StaticFiles
+from pydantic import BaseModel
+from typing import List, Dict
+import database
+from scraper import NewsScraper
+from datetime import datetime
+import os
+
+app = FastAPI()
+
+# Initialize DB
+database.init_db()
+
+# Serve static files
+os.makedirs("static", exist_ok=True)
+app.mount("/static", StaticFiles(directory="static"), name="static")
+
+@app.get("/")
+async def read_root():
+    return FileResponse('static/index.html')
+
+@app.post("/api/collect-news")
+async def collect_news():
+    scraper = NewsScraper()
+    categories = ["Economy", "Society", "Lifestyle", "Health"]
+    results = {}
+    
+    total_count = 0
+    for cat in categories:
+        articles = scraper.scrape_category(cat, limit=5)
+        for article in articles:
+            database.save_article(article)
+        results[cat] = len(articles)
+        total_count += len(articles)
+        
+    return {"status": "success", "collected_count": total_count, "details": results}
+
+@app.get("/api/dates")
+async def get_dates():
+    dates = database.get_available_dates()
+    return {"dates": dates}
+
+from fastapi.responses import FileResponse, Response
+import json
+
+@app.get("/api/download-json")
+async def download_json(date: str = None):
+    # Reuse get_news logic
+    categories = ["Economy", "Society", "Lifestyle", "Health"]
+    data = {}
+    
+    # Use provided date or today/latest if None
+    # If date is None, get_articles(limit=5) gets latest regardless of date.
+    # To be precise for file name, if date is None, we might want to know the date of collected items?
+    # For now let's just use the logic we have.
+    
+    for cat in categories:
+        articles = database.get_articles(category=cat, collection_date=date, limit=5)
+        # Convert sqlite rows to dicts if not already (get_articles does it)
+        data[cat] = articles
+    
+    file_date = date if date else datetime.now().strftime("%Y-%m-%d")
+    filename = f"japan-news-{file_date}.json"
+    json_content = json.dumps(data, indent=2, ensure_ascii=False)
+    
+    return Response(
+        content=json_content,
+        media_type="application/json",
+        headers={"Content-Disposition": f"attachment; filename={filename}"}
+    )
+
+@app.get("/api/news")
+async def get_news(date: str = None):
+    # Helper to restructure for frontend
+    categories = ["Economy", "Society", "Lifestyle", "Health"]
+    response_data = {}
+
+    for cat in categories:
+        articles = database.get_articles(category=cat, collection_date=date, limit=5)
+        response_data[cat] = articles
+
+    return response_data
+
+
+@app.get("/api/today")
+async def get_today_news():
+    """
+    Get today's news. If no articles exist for today, collect them first.
+    Returns JSON with all categories.
+    """
+    today = datetime.now().strftime("%Y-%m-%d")
+    categories = ["Economy", "Society", "Lifestyle", "Health"]
+
+    # Check if we have any articles for today
+    has_today_articles = False
+    for cat in categories:
+        articles = database.get_articles(category=cat, collection_date=today, limit=1)
+        if articles:
+            has_today_articles = True
+            break
+
+    # If no articles for today, collect them
+    if not has_today_articles:
+        scraper = NewsScraper()
+        for cat in categories:
+            articles = scraper.scrape_category(cat, limit=5)
+            for article in articles:
+                database.save_article(article)
+
+    # Return today's articles
+    response_data = {
+        "date": today,
+        "articles": {}
+    }
+
+    total_count = 0
+    for cat in categories:
+        articles = database.get_articles(category=cat, collection_date=today, limit=5)
+        response_data["articles"][cat] = articles
+        total_count += len(articles)
+
+    response_data["total_count"] = total_count
+
+    return response_data