Initial commit: Japan Senior News Collector

- FastAPI backend with news scraping from Yahoo Japan
- SQLite database for article storage
- Web UI with dark mode, article modal, statistics dashboard
- Docker support for containerized deployment
- API endpoints: /api/today, /api/news, /api/collect-news, /api/dates, /api/download-json
- Auto-collect feature when requesting today news
- Content filtering for articles without body text
This commit is contained in:
kihong.kim
2025-12-15 15:55:37 +09:00
commit 56a6de61ce
11 changed files with 1358 additions and 0 deletions

103
database.py Normal file
View File

@@ -0,0 +1,103 @@
import sqlite3
from datetime import datetime, date
from typing import List, Optional
from pydantic import BaseModel
DB_NAME = "news.db"
class Article(BaseModel):
title: str
url: str
image_url: Optional[str] = None
published_date: Optional[str] = None
category: str
source: str = "Yahoo Japan"
collected_at: str = datetime.now().isoformat()
content: Optional[str] = None
def init_db():
conn = sqlite3.connect(DB_NAME)
c = conn.cursor()
c.execute('''
CREATE TABLE IF NOT EXISTS articles (
id INTEGER PRIMARY KEY AUTOINCREMENT,
title TEXT NOT NULL,
url TEXT UNIQUE NOT NULL,
image_url TEXT,
published_date TEXT,
category TEXT,
source TEXT,
collected_at TEXT,
content TEXT
)
''')
conn.commit()
conn.close()
def save_article(article: Article):
conn = sqlite3.connect(DB_NAME)
c = conn.cursor()
try:
# Check if content column exists (for migration)
cursor = c.execute("PRAGMA table_info(articles)")
columns = [info[1] for info in cursor.fetchall()]
if 'content' not in columns:
c.execute("ALTER TABLE articles ADD COLUMN content TEXT")
conn.commit()
c.execute('''
INSERT INTO articles (title, url, image_url, published_date, category, source, collected_at, content)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
ON CONFLICT(url) DO UPDATE SET
content = excluded.content,
image_url = excluded.image_url,
published_date = excluded.published_date
''', (article.title, article.url, article.image_url, article.published_date, article.category, article.source, article.collected_at, article.content))
conn.commit()
except Exception as e:
print(f"Error saving article: {e}")
finally:
conn.close()
def get_articles(category: Optional[str] = None, collection_date: Optional[str] = None, limit: int = 5) -> List[dict]:
conn = sqlite3.connect(DB_NAME)
conn.row_factory = sqlite3.Row
c = conn.cursor()
# Filter out articles without content
query = "SELECT * FROM articles WHERE content IS NOT NULL AND content != '' AND content != 'Content not found.'"
params = []
if category:
query += " AND category = ?"
params.append(category)
if collection_date:
# Assuming collection_date is 'YYYY-MM-DD' and collected_at is ISO format
query += " AND date(collected_at) = ?"
params.append(collection_date)
else:
# Default to today if no date specified? Or just get latest?
# User said "collect news based on today".
# But for viewing, maybe we just want the latest batch.
# Let's order by collected_at desc
pass
query += " ORDER BY collected_at DESC LIMIT ?"
params.append(limit)
c.execute(query, tuple(params))
rows = c.fetchall()
conn.close()
return [dict(row) for row in rows]
def get_available_dates() -> List[str]:
conn = sqlite3.connect(DB_NAME)
c = conn.cursor()
# Extract distinct dates YYYY-MM-DD
c.execute("SELECT DISTINCT date(collected_at) as date_val FROM articles ORDER BY date_val DESC")
rows = c.fetchall()
conn.close()
return [row[0] for row in rows if row[0]]