- FastAPI backend with news scraping from Yahoo Japan - SQLite database for article storage - Web UI with dark mode, article modal, statistics dashboard - Docker support for containerized deployment - API endpoints: /api/today, /api/news, /api/collect-news, /api/dates, /api/download-json - Auto-collect feature when requesting today news - Content filtering for articles without body text
127 lines
3.8 KiB
Python
127 lines
3.8 KiB
Python
from fastapi import FastAPI, HTTPException
|
|
from fastapi.responses import FileResponse
|
|
from fastapi.staticfiles import StaticFiles
|
|
from pydantic import BaseModel
|
|
from typing import List, Dict
|
|
import database
|
|
from scraper import NewsScraper
|
|
from datetime import datetime
|
|
import os
|
|
|
|
app = FastAPI()
|
|
|
|
# Initialize DB
|
|
database.init_db()
|
|
|
|
# Serve static files
|
|
os.makedirs("static", exist_ok=True)
|
|
app.mount("/static", StaticFiles(directory="static"), name="static")
|
|
|
|
@app.get("/")
|
|
async def read_root():
|
|
return FileResponse('static/index.html')
|
|
|
|
@app.post("/api/collect-news")
|
|
async def collect_news():
|
|
scraper = NewsScraper()
|
|
categories = ["Economy", "Society", "Lifestyle", "Health"]
|
|
results = {}
|
|
|
|
total_count = 0
|
|
for cat in categories:
|
|
articles = scraper.scrape_category(cat, limit=5)
|
|
for article in articles:
|
|
database.save_article(article)
|
|
results[cat] = len(articles)
|
|
total_count += len(articles)
|
|
|
|
return {"status": "success", "collected_count": total_count, "details": results}
|
|
|
|
@app.get("/api/dates")
|
|
async def get_dates():
|
|
dates = database.get_available_dates()
|
|
return {"dates": dates}
|
|
|
|
from fastapi.responses import FileResponse, Response
|
|
import json
|
|
|
|
@app.get("/api/download-json")
|
|
async def download_json(date: str = None):
|
|
# Reuse get_news logic
|
|
categories = ["Economy", "Society", "Lifestyle", "Health"]
|
|
data = {}
|
|
|
|
# Use provided date or today/latest if None
|
|
# If date is None, get_articles(limit=5) gets latest regardless of date.
|
|
# To be precise for file name, if date is None, we might want to know the date of collected items?
|
|
# For now let's just use the logic we have.
|
|
|
|
for cat in categories:
|
|
articles = database.get_articles(category=cat, collection_date=date, limit=5)
|
|
# Convert sqlite rows to dicts if not already (get_articles does it)
|
|
data[cat] = articles
|
|
|
|
file_date = date if date else datetime.now().strftime("%Y-%m-%d")
|
|
filename = f"japan-news-{file_date}.json"
|
|
json_content = json.dumps(data, indent=2, ensure_ascii=False)
|
|
|
|
return Response(
|
|
content=json_content,
|
|
media_type="application/json",
|
|
headers={"Content-Disposition": f"attachment; filename={filename}"}
|
|
)
|
|
|
|
@app.get("/api/news")
|
|
async def get_news(date: str = None):
|
|
# Helper to restructure for frontend
|
|
categories = ["Economy", "Society", "Lifestyle", "Health"]
|
|
response_data = {}
|
|
|
|
for cat in categories:
|
|
articles = database.get_articles(category=cat, collection_date=date, limit=5)
|
|
response_data[cat] = articles
|
|
|
|
return response_data
|
|
|
|
|
|
@app.get("/api/today")
|
|
async def get_today_news():
|
|
"""
|
|
Get today's news. If no articles exist for today, collect them first.
|
|
Returns JSON with all categories.
|
|
"""
|
|
today = datetime.now().strftime("%Y-%m-%d")
|
|
categories = ["Economy", "Society", "Lifestyle", "Health"]
|
|
|
|
# Check if we have any articles for today
|
|
has_today_articles = False
|
|
for cat in categories:
|
|
articles = database.get_articles(category=cat, collection_date=today, limit=1)
|
|
if articles:
|
|
has_today_articles = True
|
|
break
|
|
|
|
# If no articles for today, collect them
|
|
if not has_today_articles:
|
|
scraper = NewsScraper()
|
|
for cat in categories:
|
|
articles = scraper.scrape_category(cat, limit=5)
|
|
for article in articles:
|
|
database.save_article(article)
|
|
|
|
# Return today's articles
|
|
response_data = {
|
|
"date": today,
|
|
"articles": {}
|
|
}
|
|
|
|
total_count = 0
|
|
for cat in categories:
|
|
articles = database.get_articles(category=cat, collection_date=today, limit=5)
|
|
response_data["articles"][cat] = articles
|
|
total_count += len(articles)
|
|
|
|
response_data["total_count"] = total_count
|
|
|
|
return response_data
|