Files
collect-japan-news/main.py
kihong.kim 56a6de61ce Initial commit: Japan Senior News Collector
- FastAPI backend with news scraping from Yahoo Japan
- SQLite database for article storage
- Web UI with dark mode, article modal, statistics dashboard
- Docker support for containerized deployment
- API endpoints: /api/today, /api/news, /api/collect-news, /api/dates, /api/download-json
- Auto-collect feature when requesting today news
- Content filtering for articles without body text
2025-12-15 15:55:37 +09:00

127 lines
3.8 KiB
Python

from fastapi import FastAPI, HTTPException
from fastapi.responses import FileResponse
from fastapi.staticfiles import StaticFiles
from pydantic import BaseModel
from typing import List, Dict
import database
from scraper import NewsScraper
from datetime import datetime
import os
app = FastAPI()
# Initialize DB
database.init_db()
# Serve static files
os.makedirs("static", exist_ok=True)
app.mount("/static", StaticFiles(directory="static"), name="static")
@app.get("/")
async def read_root():
return FileResponse('static/index.html')
@app.post("/api/collect-news")
async def collect_news():
scraper = NewsScraper()
categories = ["Economy", "Society", "Lifestyle", "Health"]
results = {}
total_count = 0
for cat in categories:
articles = scraper.scrape_category(cat, limit=5)
for article in articles:
database.save_article(article)
results[cat] = len(articles)
total_count += len(articles)
return {"status": "success", "collected_count": total_count, "details": results}
@app.get("/api/dates")
async def get_dates():
dates = database.get_available_dates()
return {"dates": dates}
from fastapi.responses import FileResponse, Response
import json
@app.get("/api/download-json")
async def download_json(date: str = None):
# Reuse get_news logic
categories = ["Economy", "Society", "Lifestyle", "Health"]
data = {}
# Use provided date or today/latest if None
# If date is None, get_articles(limit=5) gets latest regardless of date.
# To be precise for file name, if date is None, we might want to know the date of collected items?
# For now let's just use the logic we have.
for cat in categories:
articles = database.get_articles(category=cat, collection_date=date, limit=5)
# Convert sqlite rows to dicts if not already (get_articles does it)
data[cat] = articles
file_date = date if date else datetime.now().strftime("%Y-%m-%d")
filename = f"japan-news-{file_date}.json"
json_content = json.dumps(data, indent=2, ensure_ascii=False)
return Response(
content=json_content,
media_type="application/json",
headers={"Content-Disposition": f"attachment; filename={filename}"}
)
@app.get("/api/news")
async def get_news(date: str = None):
# Helper to restructure for frontend
categories = ["Economy", "Society", "Lifestyle", "Health"]
response_data = {}
for cat in categories:
articles = database.get_articles(category=cat, collection_date=date, limit=5)
response_data[cat] = articles
return response_data
@app.get("/api/today")
async def get_today_news():
"""
Get today's news. If no articles exist for today, collect them first.
Returns JSON with all categories.
"""
today = datetime.now().strftime("%Y-%m-%d")
categories = ["Economy", "Society", "Lifestyle", "Health"]
# Check if we have any articles for today
has_today_articles = False
for cat in categories:
articles = database.get_articles(category=cat, collection_date=today, limit=1)
if articles:
has_today_articles = True
break
# If no articles for today, collect them
if not has_today_articles:
scraper = NewsScraper()
for cat in categories:
articles = scraper.scrape_category(cat, limit=5)
for article in articles:
database.save_article(article)
# Return today's articles
response_data = {
"date": today,
"articles": {}
}
total_count = 0
for cat in categories:
articles = database.get_articles(category=cat, collection_date=today, limit=5)
response_data["articles"][cat] = articles
total_count += len(articles)
response_data["total_count"] = total_count
return response_data