Initial commit: Japan Senior News Collector
- FastAPI backend with news scraping from Yahoo Japan - SQLite database for article storage - Web UI with dark mode, article modal, statistics dashboard - Docker support for containerized deployment - API endpoints: /api/today, /api/news, /api/collect-news, /api/dates, /api/download-json - Auto-collect feature when requesting today news - Content filtering for articles without body text
This commit is contained in:
126
main.py
Normal file
126
main.py
Normal file
@@ -0,0 +1,126 @@
|
||||
from fastapi import FastAPI, HTTPException
|
||||
from fastapi.responses import FileResponse
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
from pydantic import BaseModel
|
||||
from typing import List, Dict
|
||||
import database
|
||||
from scraper import NewsScraper
|
||||
from datetime import datetime
|
||||
import os
|
||||
|
||||
app = FastAPI()
|
||||
|
||||
# Initialize DB
|
||||
database.init_db()
|
||||
|
||||
# Serve static files
|
||||
os.makedirs("static", exist_ok=True)
|
||||
app.mount("/static", StaticFiles(directory="static"), name="static")
|
||||
|
||||
@app.get("/")
|
||||
async def read_root():
|
||||
return FileResponse('static/index.html')
|
||||
|
||||
@app.post("/api/collect-news")
|
||||
async def collect_news():
|
||||
scraper = NewsScraper()
|
||||
categories = ["Economy", "Society", "Lifestyle", "Health"]
|
||||
results = {}
|
||||
|
||||
total_count = 0
|
||||
for cat in categories:
|
||||
articles = scraper.scrape_category(cat, limit=5)
|
||||
for article in articles:
|
||||
database.save_article(article)
|
||||
results[cat] = len(articles)
|
||||
total_count += len(articles)
|
||||
|
||||
return {"status": "success", "collected_count": total_count, "details": results}
|
||||
|
||||
@app.get("/api/dates")
|
||||
async def get_dates():
|
||||
dates = database.get_available_dates()
|
||||
return {"dates": dates}
|
||||
|
||||
from fastapi.responses import FileResponse, Response
|
||||
import json
|
||||
|
||||
@app.get("/api/download-json")
|
||||
async def download_json(date: str = None):
|
||||
# Reuse get_news logic
|
||||
categories = ["Economy", "Society", "Lifestyle", "Health"]
|
||||
data = {}
|
||||
|
||||
# Use provided date or today/latest if None
|
||||
# If date is None, get_articles(limit=5) gets latest regardless of date.
|
||||
# To be precise for file name, if date is None, we might want to know the date of collected items?
|
||||
# For now let's just use the logic we have.
|
||||
|
||||
for cat in categories:
|
||||
articles = database.get_articles(category=cat, collection_date=date, limit=5)
|
||||
# Convert sqlite rows to dicts if not already (get_articles does it)
|
||||
data[cat] = articles
|
||||
|
||||
file_date = date if date else datetime.now().strftime("%Y-%m-%d")
|
||||
filename = f"japan-news-{file_date}.json"
|
||||
json_content = json.dumps(data, indent=2, ensure_ascii=False)
|
||||
|
||||
return Response(
|
||||
content=json_content,
|
||||
media_type="application/json",
|
||||
headers={"Content-Disposition": f"attachment; filename={filename}"}
|
||||
)
|
||||
|
||||
@app.get("/api/news")
|
||||
async def get_news(date: str = None):
|
||||
# Helper to restructure for frontend
|
||||
categories = ["Economy", "Society", "Lifestyle", "Health"]
|
||||
response_data = {}
|
||||
|
||||
for cat in categories:
|
||||
articles = database.get_articles(category=cat, collection_date=date, limit=5)
|
||||
response_data[cat] = articles
|
||||
|
||||
return response_data
|
||||
|
||||
|
||||
@app.get("/api/today")
|
||||
async def get_today_news():
|
||||
"""
|
||||
Get today's news. If no articles exist for today, collect them first.
|
||||
Returns JSON with all categories.
|
||||
"""
|
||||
today = datetime.now().strftime("%Y-%m-%d")
|
||||
categories = ["Economy", "Society", "Lifestyle", "Health"]
|
||||
|
||||
# Check if we have any articles for today
|
||||
has_today_articles = False
|
||||
for cat in categories:
|
||||
articles = database.get_articles(category=cat, collection_date=today, limit=1)
|
||||
if articles:
|
||||
has_today_articles = True
|
||||
break
|
||||
|
||||
# If no articles for today, collect them
|
||||
if not has_today_articles:
|
||||
scraper = NewsScraper()
|
||||
for cat in categories:
|
||||
articles = scraper.scrape_category(cat, limit=5)
|
||||
for article in articles:
|
||||
database.save_article(article)
|
||||
|
||||
# Return today's articles
|
||||
response_data = {
|
||||
"date": today,
|
||||
"articles": {}
|
||||
}
|
||||
|
||||
total_count = 0
|
||||
for cat in categories:
|
||||
articles = database.get_articles(category=cat, collection_date=today, limit=5)
|
||||
response_data["articles"][cat] = articles
|
||||
total_count += len(articles)
|
||||
|
||||
response_data["total_count"] = total_count
|
||||
|
||||
return response_data
|
||||
Reference in New Issue
Block a user