python/book_reader/小说阅读器.py

986 lines
42 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import tkinter as tk
from tkinter import ttk, messagebox, filedialog
import threading
import os
import sys
import requests
from bs4 import BeautifulSoup
import fitz
class NovelSpider:
def __init__(self):
self.session = requests.Session()
adapter = requests.adapters.HTTPAdapter(
pool_connections=20,
pool_maxsize=20,
max_retries=2
)
self.session.mount('http://', adapter)
self.session.mount('https://', adapter)
self.session.verify = False
self.session.headers.update({
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8',
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
'Referer': 'https://www.92yanqing.com/',
'Connection': 'keep-alive'
})
self.timeout = 8
def get_chapters(self, book_url):
try:
response = self.session.get(book_url, timeout=self.timeout)
response.encoding = "utf-8"
soup = BeautifulSoup(response.text, 'html.parser')
chapters = []
chapter_list = soup.find('div', class_='chapterlist')
if not chapter_list:
chapter_list = soup.find('div', class_='listmain')
if not chapter_list:
chapter_list = soup.find('div', id='list')
if not chapter_list:
chapter_list = soup.find('ul', class_='chapterlist')
if not chapter_list:
chapter_list = soup.find('div', class_='chapter')
if chapter_list:
links = chapter_list.find_all('a', href=True)
for link in links:
href = link.get('href')
title = link.get_text(strip=True)
if href and title and '/read/' in href:
if not href.startswith('http'):
if href.startswith('/'):
href = "https://www.92yanqing.com" + href
else:
href = book_url.rstrip('/') + '/' + href
chapters.append((title, href, book_url))
if len(chapters) < 10:
start_read_link = soup.find('a', text='开始阅读')
if start_read_link:
start_url = start_read_link.get('href')
if not start_url.startswith('http'):
if start_url.startswith('/'):
start_url = "https://www.92yanqing.com" + start_url
else:
start_url = book_url.rstrip('/') + '/' + start_url
try:
response = self.session.get(start_url)
response.encoding = "utf-8"
soup = BeautifulSoup(response.text, 'html.parser')
chapter_select = soup.find('select')
if chapter_select:
options = chapter_select.find_all('option')
for option in options:
value = option.get('value')
title = option.get_text(strip=True)
if value and title and value != '#':
if not value.startswith('http'):
if value.startswith('/'):
value = "https://www.92yanqing.com" + value
else:
value = book_url.rstrip('/') + '/' + value
chapters.append((title, value, book_url))
except Exception as e:
print(f"尝试从开始阅读页面获取章节失败: {e}")
chapters.sort(key=lambda x: x[0])
print(f"获取到 {len(chapters)}")
return chapters
except Exception as e:
print(f"获取章节失败: {e}")
return []
def get_content(self, chapter_info):
title, url, book_url = chapter_info
content = ""
has_chapter_end = False
try:
while url:
response = self.session.get(url, timeout=self.timeout)
response.encoding = "utf-8"
soup = BeautifulSoup(response.text, 'html.parser')
content_div = soup.find('div', id='content') or soup.find('div', class_='content')
if content_div:
for script in content_div(['script', 'style']):
script.decompose()
text = content_div.get_text(separator='\n', strip=True)
has_chapter_end = '本章完' in text
text = text.replace('本章未完,点击下一页继续阅读', '')
text = text.replace('本章完', '')
text = text.replace('请记住本书首发域名www.92yanqing.com。', '')
text = text.replace('92言情小说网', '')
text = text.replace('最快更新无弹窗小说', '')
lines = text.split('\n')
cleaned_lines = []
for line in lines:
line = line.strip()
if line and line != title:
cleaned_lines.append(line)
content += '\n'.join(cleaned_lines) + '\n\n'
next_page = None
next_link = soup.find('a', text='下一页') or soup.find('a', text='下一章')
if next_link:
next_page = next_link.get('href')
if not next_page.startswith('http'):
if next_page.startswith('/'):
next_page = "https://www.92yanqing.com" + next_page
else:
next_page = book_url.rstrip('/') + '/' + next_page
if has_chapter_end or not next_page:
break
url = next_page
return content.strip()
except Exception as e:
print(f"获取内容失败: {e}")
return content
class PDFViewer(tk.Frame):
def __init__(self, parent):
super().__init__(parent)
self.parent = parent
self.pdf_document = None
self.current_page = 0
self.total_pages = 0
self.page_images = {}
self.outline = []
self.paned_window = ttk.PanedWindow(self, orient=tk.HORIZONTAL)
self.paned_window.pack(fill=tk.BOTH, expand=True)
self.outline_frame = ttk.Frame(self, width=200, borderwidth=1, relief="solid")
self.outline_frame.pack_propagate(False)
self.outline_tree = ttk.Treeview(self.outline_frame, show='tree')
self.outline_tree.pack(fill=tk.BOTH, expand=True)
self.outline_tree.bind('<<TreeviewSelect>>', self.on_outline_select)
self.outline_tree.insert("", tk.END, text="请打开PDF文件")
self.content_frame = ttk.Frame(self)
self.content_frame.pack_propagate(False)
self.canvas = tk.Canvas(self.content_frame, bg='white')
self.canvas.pack(fill=tk.BOTH, expand=True)
self.scrollbar_y = ttk.Scrollbar(self.content_frame, orient=tk.VERTICAL, command=self.canvas.yview)
self.scrollbar_y.pack(side=tk.RIGHT, fill=tk.Y)
self.canvas.config(yscrollcommand=self.scrollbar_y.set)
self.scrollbar_x = ttk.Scrollbar(self.content_frame, orient=tk.HORIZONTAL, command=self.canvas.xview)
self.scrollbar_x.pack(side=tk.BOTTOM, fill=tk.X)
self.canvas.config(xscrollcommand=self.scrollbar_x.set)
self.canvas.bind('<MouseWheel>', self.on_mousewheel)
self.canvas.bind('<Button-4>', self.on_mousewheel)
self.canvas.bind('<Button-5>', self.on_mousewheel)
self.status_label = ttk.Label(self, text="")
self.status_label.pack(side=tk.BOTTOM, fill=tk.X)
def load_pdf(self, file_path):
try:
print(f"开始加载PDF: {file_path}")
self.pdf_document = fitz.open(file_path)
self.total_pages = len(self.pdf_document)
self.current_page = 0
self.page_images = {}
print(f"PDF加载成功{self.total_pages}")
self.load_outline()
self.show_page(0)
self.update_status()
return True
except Exception as e:
print(f"加载PDF失败: {e}")
messagebox.showerror("错误", f"加载PDF失败: {str(e)}")
return False
def load_outline(self):
for item in self.outline_tree.get_children():
self.outline_tree.delete(item)
self.outline = []
try:
toc = self.pdf_document.get_toc()
print(f"获取到书签数量: {len(toc) if toc else 0}")
if toc:
parent_map = {0: ""}
count = 0
for i, entry in enumerate(toc[:5]):
print(f"书签 {i}: {entry}")
for entry in toc:
level, title, page_num = entry
if level not in parent_map:
parent_map[level] = parent_map.get(level - 1, "")
parent = parent_map.get(level - 1, "")
try:
item_id = self.outline_tree.insert(parent, tk.END, text=title, values=(page_num - 1,))
parent_map[level] = item_id
self.outline.append((title, page_num - 1))
count += 1
except Exception as insert_e:
print(f"插入书签失败 '{title}': {insert_e}")
print(f"已加载 {count} 个书签")
print(f"Treeview子节点数量: {len(self.outline_tree.get_children())}")
else:
self.outline_tree.insert("", tk.END, text="该PDF没有书签")
print("PDF没有书签")
except Exception as e:
self.outline_tree.insert("", tk.END, text="加载书签失败")
print(f"加载书签失败: {e}")
panes = list(self.paned_window.panes())
for pane in panes:
self.paned_window.forget(pane)
self.paned_window.add(self.outline_frame, weight=1)
self.paned_window.add(self.content_frame, weight=3)
def on_outline_select(self, event):
selected = self.outline_tree.selection()
if selected:
item = selected[0]
page_num = int(self.outline_tree.item(item, "values")[0])
self.show_page(page_num)
def show_page(self, page_num):
if not self.pdf_document or page_num < 0 or page_num >= self.total_pages:
return
self.current_page = page_num
if page_num in self.page_images:
img = self.page_images[page_num]
else:
page = self.pdf_document.load_page(page_num)
pix = page.get_pixmap(matrix=fitz.Matrix(2, 2))
img = tk.PhotoImage(data=pix.tobytes('ppm'))
self.page_images[page_num] = img
self.canvas.image = img
self.canvas.delete('all')
self.canvas.create_image(0, 0, anchor=tk.NW, image=img)
self.canvas.config(scrollregion=self.canvas.bbox(tk.ALL))
self.canvas.yview_moveto(0)
self.canvas.xview_moveto(0)
self.update_status()
def update_status(self):
if self.pdf_document:
self.status_label.config(text=f"{self.current_page + 1} / {self.total_pages}")
else:
self.status_label.config(text="")
def on_mousewheel(self, event):
if event.num == 4 or event.delta > 0:
self.canvas.yview_scroll(-1, 'units')
else:
self.canvas.yview_scroll(1, 'units')
def next_page(self):
if self.current_page < self.total_pages - 1:
self.show_page(self.current_page + 1)
def prev_page(self):
if self.current_page > 0:
self.show_page(self.current_page - 1)
class NovelReaderApp:
def __init__(self, root):
self.root = root
self.root.title("小说阅读器")
self.root.geometry("1200x800")
self.spider = NovelSpider()
self.chapters = []
self.current_chapter = 0
self.content_cache = {}
self.novel_name = ""
self.create_widgets()
def create_widgets(self):
self.top_frame = ttk.Frame(self.root, padding="10")
self.top_frame.pack(fill=tk.X, side=tk.TOP)
ttk.Label(self.top_frame, text="网址:").pack(side=tk.LEFT, padx=5)
self.url_entry = ttk.Entry(self.top_frame, width=50)
self.url_entry.pack(side=tk.LEFT, padx=5)
ttk.Label(self.top_frame, text="小说名称:").pack(side=tk.LEFT, padx=5)
self.name_entry = ttk.Entry(self.top_frame, width=30)
self.name_entry.pack(side=tk.LEFT, padx=5)
self.crawl_btn = ttk.Button(self.top_frame, text="开始爬取", command=self.start_crawl)
self.crawl_btn.pack(side=tk.LEFT, padx=5)
self.search_btn = ttk.Button(self.top_frame, text="搜索小说", command=self.search_novel)
self.search_btn.pack(side=tk.LEFT, padx=5)
self.open_pdf_btn = ttk.Button(self.top_frame, text="打开PDF", command=self.open_pdf_file)
self.open_pdf_btn.pack(side=tk.LEFT, padx=5)
self.mode_var = tk.StringVar(value="text")
ttk.Radiobutton(self.top_frame, text="文本阅读", variable=self.mode_var, value="text",
command=self.switch_mode).pack(side=tk.LEFT, padx=5)
ttk.Radiobutton(self.top_frame, text="PDF阅读", variable=self.mode_var, value="pdf",
command=self.switch_mode).pack(side=tk.LEFT, padx=5)
self.progress_bar = ttk.Progressbar(self.top_frame, mode='determinate', maximum=100, value=0)
self.main_frame = ttk.Frame(self.root)
self.main_frame.pack(fill=tk.BOTH, expand=True)
self.left_frame = ttk.Frame(self.main_frame, width=250, borderwidth=1, relief="solid")
self.left_frame.pack(fill=tk.Y, side=tk.LEFT)
self.left_frame.pack_propagate(False)
self.chapter_tree = ttk.Treeview(self.left_frame, columns=('index',), show='tree')
self.chapter_tree.pack(fill=tk.BOTH, expand=True)
self.chapter_tree.bind('<<TreeviewSelect>>', self.on_chapter_select)
self.chapter_tree.insert("", tk.END, text="请输入小说网址并爬取")
print("章节树已初始化,显示提示文本")
self.text_frame = ttk.Frame(self.main_frame)
self.text_frame.pack(fill=tk.BOTH, expand=True)
self.content_text = tk.Text(self.text_frame, wrap=tk.WORD, font=('SimSun', 12))
self.content_text.pack(fill=tk.BOTH, expand=True)
self.pdf_frame = PDFViewer(self.main_frame)
self.bottom_frame = ttk.Frame(self.root, padding="10")
self.bottom_frame.pack(fill=tk.X, side=tk.BOTTOM)
self.status_label = ttk.Label(self.bottom_frame, text="准备就绪")
self.status_label.pack(side=tk.RIGHT)
self.current_pdf_path = ""
self.stop_crawl_flag = False
def sort_chapters(self, chapters):
import re
def extract_chapter_number(title):
match = re.search(r'第(\d+)章', title)
if match:
return int(match.group(1))
match = re.search(r'(\d+)、', title)
if match:
return int(match.group(1))
match = re.search(r'(\d+) ', title)
if match:
return int(match.group(1))
return 0
sorted_chapters = sorted(chapters, key=lambda x: extract_chapter_number(x[0]))
return sorted_chapters
def disable_inputs(self):
self.url_entry.config(state=tk.DISABLED)
self.name_entry.config(state=tk.DISABLED)
self.search_btn.config(state=tk.DISABLED)
self.open_pdf_btn.config(state=tk.DISABLED)
def enable_inputs(self):
self.url_entry.config(state=tk.NORMAL)
self.name_entry.config(state=tk.NORMAL)
self.search_btn.config(state=tk.NORMAL)
self.open_pdf_btn.config(state=tk.NORMAL)
def start_crawl(self):
if self.crawl_btn['text'] == '停止爬取':
self.stop_crawl_flag = True
self.crawl_btn.config(text='开始爬取')
self.status_label.config(text="爬取已停止")
self.enable_inputs()
return
book_url = self.url_entry.get().strip()
self.novel_name = self.name_entry.get().strip()
if not book_url:
messagebox.showwarning("警告", "请输入网址")
return
if not self.novel_name:
messagebox.showwarning("警告", "请输入小说名称")
return
self.stop_crawl_flag = False
self.content_cache = {}
self.chapters = []
self.disable_inputs()
self.crawl_btn.config(text='停止爬取')
self.progress_bar.pack(side=tk.LEFT, padx=5, fill=tk.X, expand=True)
self.start_crawl_thread()
def search_novel(self):
novel_name = self.name_entry.get().strip()
if not novel_name:
messagebox.showwarning("警告", "请输入小说名称")
return
self.status_label.config(text=f"正在搜索《{novel_name}》...")
def search_thread():
try:
search_url = f"https://www.92yanqing.com/s/?searchkey={novel_name}"
response = self.spider.session.get(search_url, timeout=10)
response.encoding = "utf-8"
soup = BeautifulSoup(response.text, 'html.parser')
results = []
book_items = soup.find_all('div', class_='bookitem') or soup.find_all('div', class_='search-item')
if not book_items:
book_items = soup.find_all('a', href=True)
for item in book_items:
title = item.get_text(strip=True)
href = item.get('href', '')
if title and href and '/read/' in href:
if not href.startswith('http'):
href = "https://www.92yanqing.com" + href
if novel_name in title:
results.append((title, href))
if not results:
self.root.after(0, lambda: messagebox.showwarning("提示", f"未找到《{novel_name}》相关小说"))
return
self.root.after(0, lambda: self.show_search_results(results))
except Exception as e:
print(f"搜索失败: {e}")
self.root.after(0, lambda: messagebox.showerror("错误", f"搜索失败: {str(e)}"))
threading.Thread(target=search_thread, daemon=True).start()
def show_search_results(self, results):
search_window = tk.Toplevel(self.root)
search_window.title("搜索结果")
search_window.geometry("800x500")
tree = ttk.Treeview(search_window, columns=('url',), show='tree')
tree.pack(fill=tk.BOTH, expand=True)
for title, url in results:
tree.insert("", tk.END, text=title, values=(url,))
def on_select(event):
selected = tree.selection()
if selected:
item = selected[0]
url = tree.item(item, "values")[0]
title = tree.item(item, "text")
self.url_entry.delete(0, tk.END)
self.url_entry.insert(0, url)
self.name_entry.delete(0, tk.END)
self.name_entry.insert(0, title)
search_window.destroy()
tree.bind('<<TreeviewSelect>>', on_select)
select_btn = ttk.Button(search_window, text="选择", command=lambda: on_select(None))
select_btn.pack(pady=10)
self.status_label.config(text="搜索完成")
def start_crawl_thread(self):
book_url = self.url_entry.get().strip()
self.novel_name = self.name_entry.get().strip()
def crawl_thread():
try:
self.root.after(0, lambda: self.progress_bar.config(value=0))
self.root.after(0, lambda: self.status_label.config(text="正在获取章节列表..."))
print("开始获取章节列表")
self.chapters = self.spider.get_chapters(book_url)
print(f"获取章节完成,共 {len(self.chapters)}")
if self.stop_crawl_flag:
return
if not self.chapters:
self.root.after(0, lambda: messagebox.showwarning("警告", "未能获取章节列表"))
return
self.root.after(0, lambda: self.progress_bar.config(value=5))
self.root.after(0, lambda: self.status_label.config(text="正在排序章节..."))
self.chapters = self.sort_chapters(self.chapters)
print(f"章节排序完成,共 {len(self.chapters)}")
self.root.after(0, lambda: self.progress_bar.config(value=8))
print("准备更新章节树")
self.root.after(0, self.update_chapter_tree)
self.root.after(0, lambda: self.status_label.config(text="正在生成PDF..."))
self.root.after(0, self.auto_generate_pdf)
except Exception as e:
self.root.after(0, lambda: messagebox.showerror("错误", f"爬取失败: {str(e)}"))
finally:
self.root.after(0, self.crawl_complete)
threading.Thread(target=crawl_thread, daemon=True).start()
def update_chapter_tree(self):
print(f"update_chapter_tree 被调用,章节数: {len(self.chapters)}")
for item in self.chapter_tree.get_children():
self.chapter_tree.delete(item)
for i, (title, _, _) in enumerate(self.chapters):
self.chapter_tree.insert("", tk.END, text=title, values=(i,))
print(f"章节树已更新,共 {len(self.chapters)}")
self.crawl_btn.config(state=tk.NORMAL)
self.status_label.config(text=f"{self.novel_name}》获取成功,共 {len(self.chapters)}")
if self.chapters:
self.chapter_tree.selection_set(self.chapter_tree.get_children()[0])
self.show_chapter(0)
def auto_generate_pdf(self):
try:
from reportlab.pdfbase import pdfmetrics
from reportlab.pdfbase.ttfonts import TTFont
from reportlab.platypus import SimpleDocTemplate, Paragraph, PageBreak
from reportlab.lib.styles import ParagraphStyle
from reportlab.lib import colors
from reportlab.lib.pagesizes import A4
from reportlab.lib.units import inch
except ImportError as e:
msg = f"需要安装reportlab才能生成PDF\n错误: {e}\n请运行: pip install reportlab"
self.status_label.config(text="需要安装reportlab")
messagebox.showwarning("提示", msg)
return
self.status_label.config(text=f"正在多线程获取内容...")
def pdf_thread():
try:
from concurrent.futures import ThreadPoolExecutor, as_completed
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
total = len(self.chapters)
max_workers = min(10, total)
def fetch_content(args):
idx, chapter = args
try:
if self.stop_crawl_flag:
return idx, None
content = self.spider.get_content(chapter)
return idx, content
except Exception as e:
print(f"章节 {idx} 获取失败: {e}")
return idx, None
self.root.after(0, lambda: self.status_label.config(text=f"多线程获取内容... (0/{total})"))
print(f"开始多线程获取内容,共 {total} 章,使用 {max_workers} 个线程")
with ThreadPoolExecutor(max_workers=max_workers) as executor:
futures = {}
for i, chapter in enumerate(self.chapters):
if self.stop_crawl_flag:
break
future = executor.submit(fetch_content, (i, chapter))
futures[future] = i
print(f"已提交任务 {i+1}/{total}")
completed = 0
for future in as_completed(futures):
if self.stop_crawl_flag:
executor.shutdown(wait=False)
return
idx, content = future.result()
if content:
self.content_cache[idx] = content
completed += 1
progress = 10 + int(completed / total * 75)
self.root.after(0, lambda p=progress: self.progress_bar.config(value=p))
self.root.after(0, lambda p=progress, c=completed, t=total: self.status_label.config(
text=f"多线程获取内容... {p}% ({c}/{t})"))
download_dir = os.path.join(os.getcwd(), "download")
if not os.path.exists(download_dir):
os.makedirs(download_dir)
font_path = None
possible_font_paths = [
'SimSun.ttf',
'simsun.ttc',
os.path.join('C:', 'Windows', 'Fonts', 'simsun.ttc'),
os.path.join('C:', 'Windows', 'Fonts', 'SimSun.ttf'),
os.path.join(os.environ.get('WINDIR', 'C:\\Windows'), 'Fonts', 'simsun.ttc'),
os.path.join(os.environ.get('WINDIR', 'C:\\Windows'), 'Fonts', 'SimSun.ttf')
]
for path in possible_font_paths:
if os.path.exists(path):
font_path = path
break
if font_path:
pdfmetrics.registerFont(TTFont('SimSun', font_path))
else:
messagebox.showwarning("警告", "未找到中文字体可能影响PDF生成")
self.current_pdf_path = os.path.join(download_dir, f"{self.novel_name}.pdf")
from reportlab.pdfgen import canvas
doc = SimpleDocTemplate(self.current_pdf_path, pagesize=A4, leftMargin=50, rightMargin=50,
topMargin=50, bottomMargin=50)
title_style = ParagraphStyle("BookTitle", fontSize=20, alignment=1,
spaceAfter=30, fontName="SimSun")
chap_style = ParagraphStyle("ChapTitle", fontSize=14, spaceBefore=20,
spaceAfter=10, fontName="SimSun", textColor=colors.darkblue)
txt_style = ParagraphStyle("Content", fontSize=11, leading=18,
spaceAfter=6, fontName="SimSun")
story = []
story.append(Paragraph(self.novel_name, title_style))
story.append(PageBreak())
self.root.after(0, lambda: self.status_label.config(text="正在生成PDF..."))
for i, chapter_info in enumerate(self.chapters):
if self.stop_crawl_flag:
return
title, _, _ = chapter_info
if i in self.content_cache:
content = self.content_cache[i]
else:
content = self.spider.get_content(chapter_info)
self.content_cache[i] = content
chapter_para = Paragraph(title, chap_style)
chapter_para._bookmarkName = title
chapter_para._bookmarkLevel = 0
story.append(chapter_para)
if content:
lines = content.split("\n")
for line in lines:
line = line.strip()
if line and line != title and title not in line:
story.append(Paragraph(line, txt_style))
story.append(PageBreak())
progress = 85 + int((i+1)/len(self.chapters) * 10)
self.root.after(0, lambda p=progress: self.progress_bar.config(value=p))
self.root.after(0, lambda p=progress: self.status_label.config(text=f"正在生成PDF... {p}%"))
def onFirstPage(canvas, doc):
canvas.saveState()
canvas.setFont('SimSun', 9)
canvas.drawString(inch, 0.75 * inch, f"{self.novel_name}")
canvas.restoreState()
def onLaterPages(canvas, doc):
canvas.saveState()
canvas.setFont('SimSun', 9)
canvas.drawString(inch, 0.75 * inch, f"{doc.page}")
canvas.drawRightString(doc.pagesize[0] - inch, 0.75 * inch, f"{self.novel_name}")
canvas.restoreState()
print("开始构建PDF文档...")
def add_bookmarks(canvas, doc):
for element in story:
if hasattr(element, '_bookmarkName'):
canvas.bookmarkPage(element._bookmarkName)
canvas.addOutlineEntry(element._bookmarkName, element._bookmarkName,
element._bookmarkLevel, 0)
doc.build(story, onFirstPage=onFirstPage, onLaterPages=onLaterPages)
print("PDF文档构建完成")
self.root.after(0, lambda: self.progress_bar.config(value=97))
self.root.after(0, lambda: self.status_label.config(text="正在添加书签..."))
print("开始添加书签...")
self.add_pdf_bookmarks_simple(self.current_pdf_path, self.chapters)
print("书签添加完成")
self.root.after(0, lambda: self.progress_bar.config(value=100))
self.root.after(0, lambda: self.status_label.config(text=f"PDF生成完成文件已保存"))
self.root.after(0, lambda: messagebox.showinfo("成功", f"PDF生成完成\n文件位置: {self.current_pdf_path}"))
except Exception as e:
print(f"PDF生成失败: {e}")
self.root.after(0, lambda: messagebox.showerror("错误", f"PDF生成失败: {str(e)}"))
threading.Thread(target=pdf_thread, daemon=True).start()
def add_pdf_bookmarks(self, pdf_path, toc_entries):
try:
doc = fitz.open(pdf_path)
print(f"PDF共 {doc.page_count} 页,需要添加 {len(toc_entries)} 个书签")
page_titles = {}
for page_num in range(doc.page_count):
page = doc.load_page(page_num)
text = page.get_text()
if page_num < 3:
print(f"{page_num+1} 页前500字符: {text[:500]}")
for title, estimated_page in toc_entries:
if title in text and title not in page_titles:
page_titles[title] = page_num
break
print(f"找到 {len(page_titles)} 个章节标题")
added_count = 0
for title, page_num in toc_entries:
if title in page_titles:
actual_page = page_titles[title]
else:
actual_page = max(0, page_num - 1)
if actual_page < doc.page_count:
doc.insert_toc_item(title, actual_page + 1)
added_count += 1
print(f"已添加 {added_count} 个书签")
temp_path = pdf_path + ".tmp"
doc.save(temp_path)
doc.close()
import shutil
shutil.move(temp_path, pdf_path)
print(f"书签添加成功")
except Exception as e:
print(f"添加书签失败: {e}")
def add_pdf_bookmarks_simple(self, pdf_path, chapters):
try:
from pypdf import PdfReader, PdfWriter
reader = PdfReader(pdf_path)
writer = PdfWriter()
for page in reader.pages:
writer.add_page(page)
total_pages = len(reader.pages)
print(f"PDF共有 {total_pages} 页,开始添加书签...")
last_found_page = 1
for idx, (title, _, _) in enumerate(chapters):
found = False
for page_idx in range(last_found_page, min(last_found_page + 5, total_pages)):
text = reader.pages[page_idx].extract_text()
if text and title in text:
try:
writer.add_outline_item(title, page_idx, parent=None)
print(f"为第{idx+1}'{title}' 添加书签到第{page_idx+1}")
last_found_page = page_idx + 1
found = True
except Exception as e:
pass
break
if not found:
for page_idx in range(last_found_page, total_pages):
text = reader.pages[page_idx].extract_text()
if text and title in text:
try:
writer.add_outline_item(title, page_idx, parent=None)
print(f"为第{idx+1}'{title}' 添加书签到第{page_idx+1}页(跨页查找)")
last_found_page = page_idx + 1
except Exception as e:
pass
break
output_path = pdf_path + "_with_bookmarks.pdf"
with open(output_path, "wb") as f:
writer.write(f)
import shutil
shutil.move(output_path, pdf_path)
print("书签添加完成!")
except Exception as e:
print(f"添加书签失败: {e}")
def find_chapter_page(self, doc, chapter_title, estimated_page):
search_start = max(0, estimated_page - 2)
search_end = min(doc.page_count, estimated_page + 2)
for page_num in range(search_start, search_end):
page = doc.load_page(page_num)
text = page.get_text()
if chapter_title in text:
return page_num
try:
for page_num in range(doc.page_count):
page = doc.load_page(page_num)
text = page.get_text()
if chapter_title in text:
return page_num
except Exception as e:
print(f"搜索章节页面失败: {e}")
return estimated_page - 1 if estimated_page > 0 else 0
def crawl_complete(self):
self.progress_bar.stop()
self.progress_bar.pack_forget()
if self.crawl_btn['text'] == '停止爬取':
self.crawl_btn.config(text='开始爬取')
self.enable_inputs()
def on_chapter_select(self, event):
selected = self.chapter_tree.selection()
if not selected:
return
item = selected[0]
index = int(self.chapter_tree.item(item, "values")[0])
self.current_chapter = index
if self.mode_var.get() == "text":
self.show_chapter(index)
else:
self.show_pdf_chapter(index)
def show_chapter(self, index):
if index < 0 or index >= len(self.chapters):
return
self.current_chapter = index
title, _, _ = self.chapters[index]
if index in self.content_cache:
content = self.content_cache[index]
self.status_label.config(text=f"{index+1}/{len(self.chapters)} 章:{title}")
self.display_content(title, content)
return
self.status_label.config(text=f"正在加载第 {index+1} 章:{title}")
def load_content():
content = self.spider.get_content(self.chapters[index])
self.content_cache[index] = content
self.root.after(0, lambda: self.display_content(title, content))
self.root.after(0, lambda: self.status_label.config(text=f"{index+1}/{len(self.chapters)} 章:{title}"))
threading.Thread(target=load_content, daemon=True).start()
def show_pdf_chapter(self, index):
if index < 0 or index >= len(self.chapters):
return
self.current_chapter = index
title, _, _ = self.chapters[index]
if self.pdf_frame.pdf_document:
doc = self.pdf_frame.pdf_document
for page_num in range(doc.page_count):
page = doc.load_page(page_num)
text = page.get_text()
if title in text:
self.pdf_frame.show_page(page_num)
self.status_label.config(text=f"{index+1}/{len(self.chapters)} 章:{title}")
return
self.status_label.config(text=f"未找到章节 '{title}'")
def display_content(self, title, content):
self.content_text.delete(1.0, tk.END)
self.content_text.insert(tk.END, f"{title}\n\n")
self.content_text.insert(tk.END, content)
self.content_text.config(state=tk.DISABLED)
def prev_chapter(self):
if self.mode_var.get() == "text":
if self.current_chapter > 0:
self.current_chapter -= 1
self.chapter_tree.selection_set(self.chapter_tree.get_children()[self.current_chapter])
self.show_chapter(self.current_chapter)
else:
if self.current_chapter > 0:
self.current_chapter -= 1
self.chapter_tree.selection_set(self.chapter_tree.get_children()[self.current_chapter])
self.show_pdf_chapter(self.current_chapter)
def next_chapter(self):
if self.mode_var.get() == "text":
if self.current_chapter < len(self.chapters) - 1:
self.current_chapter += 1
self.chapter_tree.selection_set(self.chapter_tree.get_children()[self.current_chapter])
self.show_chapter(self.current_chapter)
else:
if self.current_chapter < len(self.chapters) - 1:
self.current_chapter += 1
self.chapter_tree.selection_set(self.chapter_tree.get_children()[self.current_chapter])
self.show_pdf_chapter(self.current_chapter)
def switch_mode(self):
mode = self.mode_var.get()
if mode == "text":
self.pdf_frame.pack_forget()
self.text_frame.pack(fill=tk.BOTH, expand=True)
self.left_frame.pack(fill=tk.Y, side=tk.LEFT)
if self.chapters:
self.show_chapter(self.current_chapter)
else:
self.text_frame.pack_forget()
self.left_frame.pack_forget()
self.pdf_frame.pack(fill=tk.BOTH, expand=True)
if self.current_pdf_path and os.path.exists(self.current_pdf_path):
self.pdf_frame.load_pdf(self.current_pdf_path)
def open_pdf_file(self):
file_path = filedialog.askopenfilename(filetypes=[("PDF文件", "*.pdf")])
if file_path:
self.current_pdf_path = file_path
self.mode_var.set("pdf")
self.switch_mode()
if __name__ == "__main__":
root = tk.Tk()
app = NovelReaderApp(root)
root.mainloop()