Initial commit: Japan Senior News Collector
- FastAPI backend with news scraping from Yahoo Japan - SQLite database for article storage - Web UI with dark mode, article modal, statistics dashboard - Docker support for containerized deployment - API endpoints: /api/today, /api/news, /api/collect-news, /api/dates, /api/download-json - Auto-collect feature when requesting today news - Content filtering for articles without body text
This commit is contained in:
103
database.py
Normal file
103
database.py
Normal file
@@ -0,0 +1,103 @@
|
||||
import sqlite3
|
||||
from datetime import datetime, date
|
||||
from typing import List, Optional
|
||||
from pydantic import BaseModel
|
||||
|
||||
DB_NAME = "news.db"
|
||||
|
||||
class Article(BaseModel):
|
||||
title: str
|
||||
url: str
|
||||
image_url: Optional[str] = None
|
||||
published_date: Optional[str] = None
|
||||
category: str
|
||||
source: str = "Yahoo Japan"
|
||||
collected_at: str = datetime.now().isoformat()
|
||||
content: Optional[str] = None
|
||||
|
||||
def init_db():
|
||||
conn = sqlite3.connect(DB_NAME)
|
||||
c = conn.cursor()
|
||||
c.execute('''
|
||||
CREATE TABLE IF NOT EXISTS articles (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
title TEXT NOT NULL,
|
||||
url TEXT UNIQUE NOT NULL,
|
||||
image_url TEXT,
|
||||
published_date TEXT,
|
||||
category TEXT,
|
||||
source TEXT,
|
||||
collected_at TEXT,
|
||||
content TEXT
|
||||
)
|
||||
''')
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
def save_article(article: Article):
|
||||
conn = sqlite3.connect(DB_NAME)
|
||||
c = conn.cursor()
|
||||
try:
|
||||
# Check if content column exists (for migration)
|
||||
cursor = c.execute("PRAGMA table_info(articles)")
|
||||
columns = [info[1] for info in cursor.fetchall()]
|
||||
if 'content' not in columns:
|
||||
c.execute("ALTER TABLE articles ADD COLUMN content TEXT")
|
||||
conn.commit()
|
||||
|
||||
c.execute('''
|
||||
INSERT INTO articles (title, url, image_url, published_date, category, source, collected_at, content)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
||||
ON CONFLICT(url) DO UPDATE SET
|
||||
content = excluded.content,
|
||||
image_url = excluded.image_url,
|
||||
published_date = excluded.published_date
|
||||
''', (article.title, article.url, article.image_url, article.published_date, article.category, article.source, article.collected_at, article.content))
|
||||
conn.commit()
|
||||
except Exception as e:
|
||||
print(f"Error saving article: {e}")
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
def get_articles(category: Optional[str] = None, collection_date: Optional[str] = None, limit: int = 5) -> List[dict]:
|
||||
conn = sqlite3.connect(DB_NAME)
|
||||
conn.row_factory = sqlite3.Row
|
||||
c = conn.cursor()
|
||||
|
||||
# Filter out articles without content
|
||||
query = "SELECT * FROM articles WHERE content IS NOT NULL AND content != '' AND content != 'Content not found.'"
|
||||
params = []
|
||||
|
||||
if category:
|
||||
query += " AND category = ?"
|
||||
params.append(category)
|
||||
|
||||
if collection_date:
|
||||
# Assuming collection_date is 'YYYY-MM-DD' and collected_at is ISO format
|
||||
query += " AND date(collected_at) = ?"
|
||||
params.append(collection_date)
|
||||
else:
|
||||
# Default to today if no date specified? Or just get latest?
|
||||
# User said "collect news based on today".
|
||||
# But for viewing, maybe we just want the latest batch.
|
||||
# Let's order by collected_at desc
|
||||
pass
|
||||
|
||||
query += " ORDER BY collected_at DESC LIMIT ?"
|
||||
params.append(limit)
|
||||
|
||||
c.execute(query, tuple(params))
|
||||
rows = c.fetchall()
|
||||
conn.close()
|
||||
|
||||
|
||||
return [dict(row) for row in rows]
|
||||
|
||||
def get_available_dates() -> List[str]:
|
||||
conn = sqlite3.connect(DB_NAME)
|
||||
c = conn.cursor()
|
||||
# Extract distinct dates YYYY-MM-DD
|
||||
c.execute("SELECT DISTINCT date(collected_at) as date_val FROM articles ORDER BY date_val DESC")
|
||||
rows = c.fetchall()
|
||||
conn.close()
|
||||
return [row[0] for row in rows if row[0]]
|
||||
Reference in New Issue
Block a user