986 lines
42 KiB
Python
986 lines
42 KiB
Python
import tkinter as tk
|
||
from tkinter import ttk, messagebox, filedialog
|
||
import threading
|
||
import os
|
||
import sys
|
||
import requests
|
||
from bs4 import BeautifulSoup
|
||
import fitz
|
||
|
||
class NovelSpider:
|
||
def __init__(self):
|
||
self.session = requests.Session()
|
||
adapter = requests.adapters.HTTPAdapter(
|
||
pool_connections=20,
|
||
pool_maxsize=20,
|
||
max_retries=2
|
||
)
|
||
self.session.mount('http://', adapter)
|
||
self.session.mount('https://', adapter)
|
||
self.session.verify = False
|
||
self.session.headers.update({
|
||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8',
|
||
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
|
||
'Referer': 'https://www.92yanqing.com/',
|
||
'Connection': 'keep-alive'
|
||
})
|
||
self.timeout = 8
|
||
|
||
def get_chapters(self, book_url):
|
||
try:
|
||
response = self.session.get(book_url, timeout=self.timeout)
|
||
response.encoding = "utf-8"
|
||
soup = BeautifulSoup(response.text, 'html.parser')
|
||
|
||
chapters = []
|
||
|
||
chapter_list = soup.find('div', class_='chapterlist')
|
||
if not chapter_list:
|
||
chapter_list = soup.find('div', class_='listmain')
|
||
if not chapter_list:
|
||
chapter_list = soup.find('div', id='list')
|
||
if not chapter_list:
|
||
chapter_list = soup.find('ul', class_='chapterlist')
|
||
if not chapter_list:
|
||
chapter_list = soup.find('div', class_='chapter')
|
||
|
||
if chapter_list:
|
||
links = chapter_list.find_all('a', href=True)
|
||
for link in links:
|
||
href = link.get('href')
|
||
title = link.get_text(strip=True)
|
||
if href and title and '/read/' in href:
|
||
if not href.startswith('http'):
|
||
if href.startswith('/'):
|
||
href = "https://www.92yanqing.com" + href
|
||
else:
|
||
href = book_url.rstrip('/') + '/' + href
|
||
chapters.append((title, href, book_url))
|
||
|
||
if len(chapters) < 10:
|
||
start_read_link = soup.find('a', text='开始阅读')
|
||
if start_read_link:
|
||
start_url = start_read_link.get('href')
|
||
if not start_url.startswith('http'):
|
||
if start_url.startswith('/'):
|
||
start_url = "https://www.92yanqing.com" + start_url
|
||
else:
|
||
start_url = book_url.rstrip('/') + '/' + start_url
|
||
|
||
try:
|
||
response = self.session.get(start_url)
|
||
response.encoding = "utf-8"
|
||
soup = BeautifulSoup(response.text, 'html.parser')
|
||
|
||
chapter_select = soup.find('select')
|
||
if chapter_select:
|
||
options = chapter_select.find_all('option')
|
||
for option in options:
|
||
value = option.get('value')
|
||
title = option.get_text(strip=True)
|
||
if value and title and value != '#':
|
||
if not value.startswith('http'):
|
||
if value.startswith('/'):
|
||
value = "https://www.92yanqing.com" + value
|
||
else:
|
||
value = book_url.rstrip('/') + '/' + value
|
||
chapters.append((title, value, book_url))
|
||
except Exception as e:
|
||
print(f"尝试从开始阅读页面获取章节失败: {e}")
|
||
|
||
chapters.sort(key=lambda x: x[0])
|
||
|
||
print(f"获取到 {len(chapters)} 章")
|
||
return chapters
|
||
except Exception as e:
|
||
print(f"获取章节失败: {e}")
|
||
return []
|
||
|
||
def get_content(self, chapter_info):
|
||
title, url, book_url = chapter_info
|
||
content = ""
|
||
has_chapter_end = False
|
||
|
||
try:
|
||
while url:
|
||
response = self.session.get(url, timeout=self.timeout)
|
||
response.encoding = "utf-8"
|
||
soup = BeautifulSoup(response.text, 'html.parser')
|
||
|
||
content_div = soup.find('div', id='content') or soup.find('div', class_='content')
|
||
|
||
if content_div:
|
||
for script in content_div(['script', 'style']):
|
||
script.decompose()
|
||
|
||
text = content_div.get_text(separator='\n', strip=True)
|
||
has_chapter_end = '本章完' in text
|
||
text = text.replace('本章未完,点击下一页继续阅读', '')
|
||
text = text.replace('本章完', '')
|
||
text = text.replace('请记住本书首发域名:www.92yanqing.com。', '')
|
||
text = text.replace('92言情小说网', '')
|
||
text = text.replace('最快更新无弹窗小说', '')
|
||
|
||
lines = text.split('\n')
|
||
cleaned_lines = []
|
||
for line in lines:
|
||
line = line.strip()
|
||
if line and line != title:
|
||
cleaned_lines.append(line)
|
||
|
||
content += '\n'.join(cleaned_lines) + '\n\n'
|
||
|
||
next_page = None
|
||
next_link = soup.find('a', text='下一页') or soup.find('a', text='下一章')
|
||
if next_link:
|
||
next_page = next_link.get('href')
|
||
if not next_page.startswith('http'):
|
||
if next_page.startswith('/'):
|
||
next_page = "https://www.92yanqing.com" + next_page
|
||
else:
|
||
next_page = book_url.rstrip('/') + '/' + next_page
|
||
|
||
if has_chapter_end or not next_page:
|
||
break
|
||
|
||
url = next_page
|
||
|
||
return content.strip()
|
||
except Exception as e:
|
||
print(f"获取内容失败: {e}")
|
||
return content
|
||
|
||
class PDFViewer(tk.Frame):
|
||
def __init__(self, parent):
|
||
super().__init__(parent)
|
||
self.parent = parent
|
||
self.pdf_document = None
|
||
self.current_page = 0
|
||
self.total_pages = 0
|
||
self.page_images = {}
|
||
self.outline = []
|
||
|
||
self.paned_window = ttk.PanedWindow(self, orient=tk.HORIZONTAL)
|
||
self.paned_window.pack(fill=tk.BOTH, expand=True)
|
||
|
||
self.outline_frame = ttk.Frame(self, width=200, borderwidth=1, relief="solid")
|
||
self.outline_frame.pack_propagate(False)
|
||
|
||
self.outline_tree = ttk.Treeview(self.outline_frame, show='tree')
|
||
self.outline_tree.pack(fill=tk.BOTH, expand=True)
|
||
self.outline_tree.bind('<<TreeviewSelect>>', self.on_outline_select)
|
||
self.outline_tree.insert("", tk.END, text="请打开PDF文件")
|
||
|
||
self.content_frame = ttk.Frame(self)
|
||
self.content_frame.pack_propagate(False)
|
||
|
||
self.canvas = tk.Canvas(self.content_frame, bg='white')
|
||
self.canvas.pack(fill=tk.BOTH, expand=True)
|
||
|
||
self.scrollbar_y = ttk.Scrollbar(self.content_frame, orient=tk.VERTICAL, command=self.canvas.yview)
|
||
self.scrollbar_y.pack(side=tk.RIGHT, fill=tk.Y)
|
||
self.canvas.config(yscrollcommand=self.scrollbar_y.set)
|
||
|
||
self.scrollbar_x = ttk.Scrollbar(self.content_frame, orient=tk.HORIZONTAL, command=self.canvas.xview)
|
||
self.scrollbar_x.pack(side=tk.BOTTOM, fill=tk.X)
|
||
self.canvas.config(xscrollcommand=self.scrollbar_x.set)
|
||
|
||
self.canvas.bind('<MouseWheel>', self.on_mousewheel)
|
||
self.canvas.bind('<Button-4>', self.on_mousewheel)
|
||
self.canvas.bind('<Button-5>', self.on_mousewheel)
|
||
|
||
self.status_label = ttk.Label(self, text="")
|
||
self.status_label.pack(side=tk.BOTTOM, fill=tk.X)
|
||
|
||
def load_pdf(self, file_path):
|
||
try:
|
||
print(f"开始加载PDF: {file_path}")
|
||
self.pdf_document = fitz.open(file_path)
|
||
self.total_pages = len(self.pdf_document)
|
||
self.current_page = 0
|
||
self.page_images = {}
|
||
print(f"PDF加载成功,共 {self.total_pages} 页")
|
||
|
||
self.load_outline()
|
||
|
||
self.show_page(0)
|
||
self.update_status()
|
||
return True
|
||
except Exception as e:
|
||
print(f"加载PDF失败: {e}")
|
||
messagebox.showerror("错误", f"加载PDF失败: {str(e)}")
|
||
return False
|
||
|
||
def load_outline(self):
|
||
for item in self.outline_tree.get_children():
|
||
self.outline_tree.delete(item)
|
||
|
||
self.outline = []
|
||
try:
|
||
toc = self.pdf_document.get_toc()
|
||
print(f"获取到书签数量: {len(toc) if toc else 0}")
|
||
|
||
if toc:
|
||
parent_map = {0: ""}
|
||
count = 0
|
||
for i, entry in enumerate(toc[:5]):
|
||
print(f"书签 {i}: {entry}")
|
||
|
||
for entry in toc:
|
||
level, title, page_num = entry
|
||
if level not in parent_map:
|
||
parent_map[level] = parent_map.get(level - 1, "")
|
||
|
||
parent = parent_map.get(level - 1, "")
|
||
try:
|
||
item_id = self.outline_tree.insert(parent, tk.END, text=title, values=(page_num - 1,))
|
||
parent_map[level] = item_id
|
||
self.outline.append((title, page_num - 1))
|
||
count += 1
|
||
except Exception as insert_e:
|
||
print(f"插入书签失败 '{title}': {insert_e}")
|
||
|
||
print(f"已加载 {count} 个书签")
|
||
print(f"Treeview子节点数量: {len(self.outline_tree.get_children())}")
|
||
else:
|
||
self.outline_tree.insert("", tk.END, text="该PDF没有书签")
|
||
print("PDF没有书签")
|
||
|
||
except Exception as e:
|
||
self.outline_tree.insert("", tk.END, text="加载书签失败")
|
||
print(f"加载书签失败: {e}")
|
||
|
||
panes = list(self.paned_window.panes())
|
||
for pane in panes:
|
||
self.paned_window.forget(pane)
|
||
|
||
self.paned_window.add(self.outline_frame, weight=1)
|
||
self.paned_window.add(self.content_frame, weight=3)
|
||
|
||
def on_outline_select(self, event):
|
||
selected = self.outline_tree.selection()
|
||
if selected:
|
||
item = selected[0]
|
||
page_num = int(self.outline_tree.item(item, "values")[0])
|
||
self.show_page(page_num)
|
||
|
||
def show_page(self, page_num):
|
||
if not self.pdf_document or page_num < 0 or page_num >= self.total_pages:
|
||
return
|
||
|
||
self.current_page = page_num
|
||
|
||
if page_num in self.page_images:
|
||
img = self.page_images[page_num]
|
||
else:
|
||
page = self.pdf_document.load_page(page_num)
|
||
pix = page.get_pixmap(matrix=fitz.Matrix(2, 2))
|
||
img = tk.PhotoImage(data=pix.tobytes('ppm'))
|
||
self.page_images[page_num] = img
|
||
|
||
self.canvas.image = img
|
||
self.canvas.delete('all')
|
||
self.canvas.create_image(0, 0, anchor=tk.NW, image=img)
|
||
self.canvas.config(scrollregion=self.canvas.bbox(tk.ALL))
|
||
self.canvas.yview_moveto(0)
|
||
self.canvas.xview_moveto(0)
|
||
self.update_status()
|
||
|
||
def update_status(self):
|
||
if self.pdf_document:
|
||
self.status_label.config(text=f"第 {self.current_page + 1} / {self.total_pages} 页")
|
||
else:
|
||
self.status_label.config(text="")
|
||
|
||
def on_mousewheel(self, event):
|
||
if event.num == 4 or event.delta > 0:
|
||
self.canvas.yview_scroll(-1, 'units')
|
||
else:
|
||
self.canvas.yview_scroll(1, 'units')
|
||
|
||
def next_page(self):
|
||
if self.current_page < self.total_pages - 1:
|
||
self.show_page(self.current_page + 1)
|
||
|
||
def prev_page(self):
|
||
if self.current_page > 0:
|
||
self.show_page(self.current_page - 1)
|
||
|
||
class NovelReaderApp:
|
||
def __init__(self, root):
|
||
self.root = root
|
||
self.root.title("小说阅读器")
|
||
self.root.geometry("1200x800")
|
||
|
||
self.spider = NovelSpider()
|
||
self.chapters = []
|
||
self.current_chapter = 0
|
||
self.content_cache = {}
|
||
self.novel_name = ""
|
||
|
||
self.create_widgets()
|
||
|
||
def create_widgets(self):
|
||
self.top_frame = ttk.Frame(self.root, padding="10")
|
||
self.top_frame.pack(fill=tk.X, side=tk.TOP)
|
||
|
||
ttk.Label(self.top_frame, text="网址:").pack(side=tk.LEFT, padx=5)
|
||
self.url_entry = ttk.Entry(self.top_frame, width=50)
|
||
self.url_entry.pack(side=tk.LEFT, padx=5)
|
||
|
||
ttk.Label(self.top_frame, text="小说名称:").pack(side=tk.LEFT, padx=5)
|
||
self.name_entry = ttk.Entry(self.top_frame, width=30)
|
||
self.name_entry.pack(side=tk.LEFT, padx=5)
|
||
|
||
self.crawl_btn = ttk.Button(self.top_frame, text="开始爬取", command=self.start_crawl)
|
||
self.crawl_btn.pack(side=tk.LEFT, padx=5)
|
||
|
||
self.search_btn = ttk.Button(self.top_frame, text="搜索小说", command=self.search_novel)
|
||
self.search_btn.pack(side=tk.LEFT, padx=5)
|
||
|
||
self.open_pdf_btn = ttk.Button(self.top_frame, text="打开PDF", command=self.open_pdf_file)
|
||
self.open_pdf_btn.pack(side=tk.LEFT, padx=5)
|
||
|
||
self.mode_var = tk.StringVar(value="text")
|
||
ttk.Radiobutton(self.top_frame, text="文本阅读", variable=self.mode_var, value="text",
|
||
command=self.switch_mode).pack(side=tk.LEFT, padx=5)
|
||
ttk.Radiobutton(self.top_frame, text="PDF阅读", variable=self.mode_var, value="pdf",
|
||
command=self.switch_mode).pack(side=tk.LEFT, padx=5)
|
||
|
||
self.progress_bar = ttk.Progressbar(self.top_frame, mode='determinate', maximum=100, value=0)
|
||
|
||
self.main_frame = ttk.Frame(self.root)
|
||
self.main_frame.pack(fill=tk.BOTH, expand=True)
|
||
|
||
self.left_frame = ttk.Frame(self.main_frame, width=250, borderwidth=1, relief="solid")
|
||
self.left_frame.pack(fill=tk.Y, side=tk.LEFT)
|
||
self.left_frame.pack_propagate(False)
|
||
|
||
self.chapter_tree = ttk.Treeview(self.left_frame, columns=('index',), show='tree')
|
||
self.chapter_tree.pack(fill=tk.BOTH, expand=True)
|
||
self.chapter_tree.bind('<<TreeviewSelect>>', self.on_chapter_select)
|
||
|
||
self.chapter_tree.insert("", tk.END, text="请输入小说网址并爬取")
|
||
print("章节树已初始化,显示提示文本")
|
||
|
||
self.text_frame = ttk.Frame(self.main_frame)
|
||
self.text_frame.pack(fill=tk.BOTH, expand=True)
|
||
|
||
self.content_text = tk.Text(self.text_frame, wrap=tk.WORD, font=('SimSun', 12))
|
||
self.content_text.pack(fill=tk.BOTH, expand=True)
|
||
|
||
self.pdf_frame = PDFViewer(self.main_frame)
|
||
|
||
self.bottom_frame = ttk.Frame(self.root, padding="10")
|
||
self.bottom_frame.pack(fill=tk.X, side=tk.BOTTOM)
|
||
|
||
|
||
|
||
self.status_label = ttk.Label(self.bottom_frame, text="准备就绪")
|
||
self.status_label.pack(side=tk.RIGHT)
|
||
|
||
self.current_pdf_path = ""
|
||
self.stop_crawl_flag = False
|
||
|
||
def sort_chapters(self, chapters):
|
||
import re
|
||
|
||
def extract_chapter_number(title):
|
||
match = re.search(r'第(\d+)章', title)
|
||
if match:
|
||
return int(match.group(1))
|
||
match = re.search(r'(\d+)、', title)
|
||
if match:
|
||
return int(match.group(1))
|
||
match = re.search(r'(\d+) ', title)
|
||
if match:
|
||
return int(match.group(1))
|
||
return 0
|
||
|
||
sorted_chapters = sorted(chapters, key=lambda x: extract_chapter_number(x[0]))
|
||
return sorted_chapters
|
||
|
||
def disable_inputs(self):
|
||
self.url_entry.config(state=tk.DISABLED)
|
||
self.name_entry.config(state=tk.DISABLED)
|
||
self.search_btn.config(state=tk.DISABLED)
|
||
self.open_pdf_btn.config(state=tk.DISABLED)
|
||
|
||
def enable_inputs(self):
|
||
self.url_entry.config(state=tk.NORMAL)
|
||
self.name_entry.config(state=tk.NORMAL)
|
||
self.search_btn.config(state=tk.NORMAL)
|
||
self.open_pdf_btn.config(state=tk.NORMAL)
|
||
|
||
def start_crawl(self):
|
||
if self.crawl_btn['text'] == '停止爬取':
|
||
self.stop_crawl_flag = True
|
||
self.crawl_btn.config(text='开始爬取')
|
||
self.status_label.config(text="爬取已停止")
|
||
self.enable_inputs()
|
||
return
|
||
|
||
book_url = self.url_entry.get().strip()
|
||
self.novel_name = self.name_entry.get().strip()
|
||
|
||
if not book_url:
|
||
messagebox.showwarning("警告", "请输入网址")
|
||
return
|
||
|
||
if not self.novel_name:
|
||
messagebox.showwarning("警告", "请输入小说名称")
|
||
return
|
||
|
||
self.stop_crawl_flag = False
|
||
self.content_cache = {}
|
||
self.chapters = []
|
||
self.disable_inputs()
|
||
self.crawl_btn.config(text='停止爬取')
|
||
self.progress_bar.pack(side=tk.LEFT, padx=5, fill=tk.X, expand=True)
|
||
|
||
self.start_crawl_thread()
|
||
|
||
def search_novel(self):
|
||
novel_name = self.name_entry.get().strip()
|
||
|
||
if not novel_name:
|
||
messagebox.showwarning("警告", "请输入小说名称")
|
||
return
|
||
|
||
self.status_label.config(text=f"正在搜索《{novel_name}》...")
|
||
|
||
def search_thread():
|
||
try:
|
||
search_url = f"https://www.92yanqing.com/s/?searchkey={novel_name}"
|
||
response = self.spider.session.get(search_url, timeout=10)
|
||
response.encoding = "utf-8"
|
||
soup = BeautifulSoup(response.text, 'html.parser')
|
||
|
||
results = []
|
||
book_items = soup.find_all('div', class_='bookitem') or soup.find_all('div', class_='search-item')
|
||
|
||
if not book_items:
|
||
book_items = soup.find_all('a', href=True)
|
||
|
||
for item in book_items:
|
||
title = item.get_text(strip=True)
|
||
href = item.get('href', '')
|
||
if title and href and '/read/' in href:
|
||
if not href.startswith('http'):
|
||
href = "https://www.92yanqing.com" + href
|
||
if novel_name in title:
|
||
results.append((title, href))
|
||
|
||
if not results:
|
||
self.root.after(0, lambda: messagebox.showwarning("提示", f"未找到《{novel_name}》相关小说"))
|
||
return
|
||
|
||
self.root.after(0, lambda: self.show_search_results(results))
|
||
|
||
except Exception as e:
|
||
print(f"搜索失败: {e}")
|
||
self.root.after(0, lambda: messagebox.showerror("错误", f"搜索失败: {str(e)}"))
|
||
|
||
threading.Thread(target=search_thread, daemon=True).start()
|
||
|
||
def show_search_results(self, results):
|
||
search_window = tk.Toplevel(self.root)
|
||
search_window.title("搜索结果")
|
||
search_window.geometry("800x500")
|
||
|
||
tree = ttk.Treeview(search_window, columns=('url',), show='tree')
|
||
tree.pack(fill=tk.BOTH, expand=True)
|
||
|
||
for title, url in results:
|
||
tree.insert("", tk.END, text=title, values=(url,))
|
||
|
||
def on_select(event):
|
||
selected = tree.selection()
|
||
if selected:
|
||
item = selected[0]
|
||
url = tree.item(item, "values")[0]
|
||
title = tree.item(item, "text")
|
||
self.url_entry.delete(0, tk.END)
|
||
self.url_entry.insert(0, url)
|
||
self.name_entry.delete(0, tk.END)
|
||
self.name_entry.insert(0, title)
|
||
search_window.destroy()
|
||
|
||
tree.bind('<<TreeviewSelect>>', on_select)
|
||
|
||
select_btn = ttk.Button(search_window, text="选择", command=lambda: on_select(None))
|
||
select_btn.pack(pady=10)
|
||
|
||
self.status_label.config(text="搜索完成")
|
||
|
||
def start_crawl_thread(self):
|
||
book_url = self.url_entry.get().strip()
|
||
self.novel_name = self.name_entry.get().strip()
|
||
|
||
def crawl_thread():
|
||
try:
|
||
self.root.after(0, lambda: self.progress_bar.config(value=0))
|
||
|
||
self.root.after(0, lambda: self.status_label.config(text="正在获取章节列表..."))
|
||
print("开始获取章节列表")
|
||
self.chapters = self.spider.get_chapters(book_url)
|
||
print(f"获取章节完成,共 {len(self.chapters)} 章")
|
||
|
||
if self.stop_crawl_flag:
|
||
return
|
||
|
||
if not self.chapters:
|
||
self.root.after(0, lambda: messagebox.showwarning("警告", "未能获取章节列表"))
|
||
return
|
||
|
||
self.root.after(0, lambda: self.progress_bar.config(value=5))
|
||
self.root.after(0, lambda: self.status_label.config(text="正在排序章节..."))
|
||
self.chapters = self.sort_chapters(self.chapters)
|
||
print(f"章节排序完成,共 {len(self.chapters)} 章")
|
||
|
||
self.root.after(0, lambda: self.progress_bar.config(value=8))
|
||
print("准备更新章节树")
|
||
self.root.after(0, self.update_chapter_tree)
|
||
|
||
self.root.after(0, lambda: self.status_label.config(text="正在生成PDF..."))
|
||
self.root.after(0, self.auto_generate_pdf)
|
||
|
||
except Exception as e:
|
||
self.root.after(0, lambda: messagebox.showerror("错误", f"爬取失败: {str(e)}"))
|
||
finally:
|
||
self.root.after(0, self.crawl_complete)
|
||
|
||
threading.Thread(target=crawl_thread, daemon=True).start()
|
||
|
||
def update_chapter_tree(self):
|
||
print(f"update_chapter_tree 被调用,章节数: {len(self.chapters)}")
|
||
for item in self.chapter_tree.get_children():
|
||
self.chapter_tree.delete(item)
|
||
|
||
for i, (title, _, _) in enumerate(self.chapters):
|
||
self.chapter_tree.insert("", tk.END, text=title, values=(i,))
|
||
print(f"章节树已更新,共 {len(self.chapters)} 章")
|
||
|
||
self.crawl_btn.config(state=tk.NORMAL)
|
||
self.status_label.config(text=f"《{self.novel_name}》获取成功,共 {len(self.chapters)} 章")
|
||
|
||
if self.chapters:
|
||
self.chapter_tree.selection_set(self.chapter_tree.get_children()[0])
|
||
self.show_chapter(0)
|
||
|
||
def auto_generate_pdf(self):
|
||
try:
|
||
from reportlab.pdfbase import pdfmetrics
|
||
from reportlab.pdfbase.ttfonts import TTFont
|
||
from reportlab.platypus import SimpleDocTemplate, Paragraph, PageBreak
|
||
from reportlab.lib.styles import ParagraphStyle
|
||
from reportlab.lib import colors
|
||
from reportlab.lib.pagesizes import A4
|
||
from reportlab.lib.units import inch
|
||
except ImportError as e:
|
||
msg = f"需要安装reportlab才能生成PDF\n错误: {e}\n请运行: pip install reportlab"
|
||
self.status_label.config(text="需要安装reportlab")
|
||
messagebox.showwarning("提示", msg)
|
||
return
|
||
|
||
self.status_label.config(text=f"正在多线程获取内容...")
|
||
|
||
def pdf_thread():
|
||
try:
|
||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||
import urllib3
|
||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||
|
||
total = len(self.chapters)
|
||
max_workers = min(10, total)
|
||
|
||
def fetch_content(args):
|
||
idx, chapter = args
|
||
try:
|
||
if self.stop_crawl_flag:
|
||
return idx, None
|
||
content = self.spider.get_content(chapter)
|
||
return idx, content
|
||
except Exception as e:
|
||
print(f"章节 {idx} 获取失败: {e}")
|
||
return idx, None
|
||
|
||
self.root.after(0, lambda: self.status_label.config(text=f"多线程获取内容... (0/{total})"))
|
||
|
||
print(f"开始多线程获取内容,共 {total} 章,使用 {max_workers} 个线程")
|
||
|
||
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
||
futures = {}
|
||
for i, chapter in enumerate(self.chapters):
|
||
if self.stop_crawl_flag:
|
||
break
|
||
future = executor.submit(fetch_content, (i, chapter))
|
||
futures[future] = i
|
||
print(f"已提交任务 {i+1}/{total}")
|
||
|
||
completed = 0
|
||
for future in as_completed(futures):
|
||
if self.stop_crawl_flag:
|
||
executor.shutdown(wait=False)
|
||
return
|
||
|
||
idx, content = future.result()
|
||
if content:
|
||
self.content_cache[idx] = content
|
||
|
||
completed += 1
|
||
progress = 10 + int(completed / total * 75)
|
||
self.root.after(0, lambda p=progress: self.progress_bar.config(value=p))
|
||
self.root.after(0, lambda p=progress, c=completed, t=total: self.status_label.config(
|
||
text=f"多线程获取内容... {p}% ({c}/{t})"))
|
||
|
||
download_dir = os.path.join(os.getcwd(), "download")
|
||
if not os.path.exists(download_dir):
|
||
os.makedirs(download_dir)
|
||
|
||
font_path = None
|
||
possible_font_paths = [
|
||
'SimSun.ttf',
|
||
'simsun.ttc',
|
||
os.path.join('C:', 'Windows', 'Fonts', 'simsun.ttc'),
|
||
os.path.join('C:', 'Windows', 'Fonts', 'SimSun.ttf'),
|
||
os.path.join(os.environ.get('WINDIR', 'C:\\Windows'), 'Fonts', 'simsun.ttc'),
|
||
os.path.join(os.environ.get('WINDIR', 'C:\\Windows'), 'Fonts', 'SimSun.ttf')
|
||
]
|
||
|
||
for path in possible_font_paths:
|
||
if os.path.exists(path):
|
||
font_path = path
|
||
break
|
||
|
||
if font_path:
|
||
pdfmetrics.registerFont(TTFont('SimSun', font_path))
|
||
else:
|
||
messagebox.showwarning("警告", "未找到中文字体,可能影响PDF生成")
|
||
|
||
self.current_pdf_path = os.path.join(download_dir, f"{self.novel_name}.pdf")
|
||
|
||
from reportlab.pdfgen import canvas
|
||
|
||
doc = SimpleDocTemplate(self.current_pdf_path, pagesize=A4, leftMargin=50, rightMargin=50,
|
||
topMargin=50, bottomMargin=50)
|
||
|
||
title_style = ParagraphStyle("BookTitle", fontSize=20, alignment=1,
|
||
spaceAfter=30, fontName="SimSun")
|
||
chap_style = ParagraphStyle("ChapTitle", fontSize=14, spaceBefore=20,
|
||
spaceAfter=10, fontName="SimSun", textColor=colors.darkblue)
|
||
txt_style = ParagraphStyle("Content", fontSize=11, leading=18,
|
||
spaceAfter=6, fontName="SimSun")
|
||
|
||
story = []
|
||
story.append(Paragraph(self.novel_name, title_style))
|
||
story.append(PageBreak())
|
||
|
||
self.root.after(0, lambda: self.status_label.config(text="正在生成PDF..."))
|
||
|
||
for i, chapter_info in enumerate(self.chapters):
|
||
if self.stop_crawl_flag:
|
||
return
|
||
|
||
title, _, _ = chapter_info
|
||
|
||
if i in self.content_cache:
|
||
content = self.content_cache[i]
|
||
else:
|
||
content = self.spider.get_content(chapter_info)
|
||
self.content_cache[i] = content
|
||
|
||
chapter_para = Paragraph(title, chap_style)
|
||
chapter_para._bookmarkName = title
|
||
chapter_para._bookmarkLevel = 0
|
||
story.append(chapter_para)
|
||
|
||
if content:
|
||
lines = content.split("\n")
|
||
for line in lines:
|
||
line = line.strip()
|
||
if line and line != title and title not in line:
|
||
story.append(Paragraph(line, txt_style))
|
||
story.append(PageBreak())
|
||
|
||
progress = 85 + int((i+1)/len(self.chapters) * 10)
|
||
self.root.after(0, lambda p=progress: self.progress_bar.config(value=p))
|
||
self.root.after(0, lambda p=progress: self.status_label.config(text=f"正在生成PDF... {p}%"))
|
||
|
||
def onFirstPage(canvas, doc):
|
||
canvas.saveState()
|
||
canvas.setFont('SimSun', 9)
|
||
canvas.drawString(inch, 0.75 * inch, f"《{self.novel_name}》")
|
||
canvas.restoreState()
|
||
|
||
def onLaterPages(canvas, doc):
|
||
canvas.saveState()
|
||
canvas.setFont('SimSun', 9)
|
||
canvas.drawString(inch, 0.75 * inch, f"第 {doc.page} 页")
|
||
canvas.drawRightString(doc.pagesize[0] - inch, 0.75 * inch, f"《{self.novel_name}》")
|
||
canvas.restoreState()
|
||
|
||
print("开始构建PDF文档...")
|
||
|
||
def add_bookmarks(canvas, doc):
|
||
for element in story:
|
||
if hasattr(element, '_bookmarkName'):
|
||
canvas.bookmarkPage(element._bookmarkName)
|
||
canvas.addOutlineEntry(element._bookmarkName, element._bookmarkName,
|
||
element._bookmarkLevel, 0)
|
||
|
||
doc.build(story, onFirstPage=onFirstPage, onLaterPages=onLaterPages)
|
||
print("PDF文档构建完成")
|
||
|
||
self.root.after(0, lambda: self.progress_bar.config(value=97))
|
||
self.root.after(0, lambda: self.status_label.config(text="正在添加书签..."))
|
||
|
||
print("开始添加书签...")
|
||
self.add_pdf_bookmarks_simple(self.current_pdf_path, self.chapters)
|
||
print("书签添加完成")
|
||
|
||
self.root.after(0, lambda: self.progress_bar.config(value=100))
|
||
self.root.after(0, lambda: self.status_label.config(text=f"PDF生成完成!文件已保存"))
|
||
self.root.after(0, lambda: messagebox.showinfo("成功", f"PDF生成完成!\n文件位置: {self.current_pdf_path}"))
|
||
|
||
except Exception as e:
|
||
print(f"PDF生成失败: {e}")
|
||
self.root.after(0, lambda: messagebox.showerror("错误", f"PDF生成失败: {str(e)}"))
|
||
|
||
threading.Thread(target=pdf_thread, daemon=True).start()
|
||
|
||
def add_pdf_bookmarks(self, pdf_path, toc_entries):
|
||
try:
|
||
doc = fitz.open(pdf_path)
|
||
print(f"PDF共 {doc.page_count} 页,需要添加 {len(toc_entries)} 个书签")
|
||
|
||
page_titles = {}
|
||
for page_num in range(doc.page_count):
|
||
page = doc.load_page(page_num)
|
||
text = page.get_text()
|
||
if page_num < 3:
|
||
print(f"第 {page_num+1} 页前500字符: {text[:500]}")
|
||
for title, estimated_page in toc_entries:
|
||
if title in text and title not in page_titles:
|
||
page_titles[title] = page_num
|
||
break
|
||
|
||
print(f"找到 {len(page_titles)} 个章节标题")
|
||
|
||
added_count = 0
|
||
for title, page_num in toc_entries:
|
||
if title in page_titles:
|
||
actual_page = page_titles[title]
|
||
else:
|
||
actual_page = max(0, page_num - 1)
|
||
|
||
if actual_page < doc.page_count:
|
||
doc.insert_toc_item(title, actual_page + 1)
|
||
added_count += 1
|
||
|
||
print(f"已添加 {added_count} 个书签")
|
||
|
||
temp_path = pdf_path + ".tmp"
|
||
doc.save(temp_path)
|
||
doc.close()
|
||
|
||
import shutil
|
||
shutil.move(temp_path, pdf_path)
|
||
|
||
print(f"书签添加成功")
|
||
except Exception as e:
|
||
print(f"添加书签失败: {e}")
|
||
|
||
def add_pdf_bookmarks_simple(self, pdf_path, chapters):
|
||
try:
|
||
from pypdf import PdfReader, PdfWriter
|
||
|
||
reader = PdfReader(pdf_path)
|
||
writer = PdfWriter()
|
||
|
||
for page in reader.pages:
|
||
writer.add_page(page)
|
||
|
||
total_pages = len(reader.pages)
|
||
print(f"PDF共有 {total_pages} 页,开始添加书签...")
|
||
|
||
last_found_page = 1
|
||
|
||
for idx, (title, _, _) in enumerate(chapters):
|
||
found = False
|
||
for page_idx in range(last_found_page, min(last_found_page + 5, total_pages)):
|
||
text = reader.pages[page_idx].extract_text()
|
||
if text and title in text:
|
||
try:
|
||
writer.add_outline_item(title, page_idx, parent=None)
|
||
print(f"为第{idx+1}章 '{title}' 添加书签到第{page_idx+1}页")
|
||
last_found_page = page_idx + 1
|
||
found = True
|
||
except Exception as e:
|
||
pass
|
||
break
|
||
|
||
if not found:
|
||
for page_idx in range(last_found_page, total_pages):
|
||
text = reader.pages[page_idx].extract_text()
|
||
if text and title in text:
|
||
try:
|
||
writer.add_outline_item(title, page_idx, parent=None)
|
||
print(f"为第{idx+1}章 '{title}' 添加书签到第{page_idx+1}页(跨页查找)")
|
||
last_found_page = page_idx + 1
|
||
except Exception as e:
|
||
pass
|
||
break
|
||
|
||
output_path = pdf_path + "_with_bookmarks.pdf"
|
||
with open(output_path, "wb") as f:
|
||
writer.write(f)
|
||
|
||
import shutil
|
||
shutil.move(output_path, pdf_path)
|
||
|
||
print("书签添加完成!")
|
||
except Exception as e:
|
||
print(f"添加书签失败: {e}")
|
||
|
||
def find_chapter_page(self, doc, chapter_title, estimated_page):
|
||
search_start = max(0, estimated_page - 2)
|
||
search_end = min(doc.page_count, estimated_page + 2)
|
||
|
||
for page_num in range(search_start, search_end):
|
||
page = doc.load_page(page_num)
|
||
text = page.get_text()
|
||
if chapter_title in text:
|
||
return page_num
|
||
|
||
try:
|
||
for page_num in range(doc.page_count):
|
||
page = doc.load_page(page_num)
|
||
text = page.get_text()
|
||
if chapter_title in text:
|
||
return page_num
|
||
except Exception as e:
|
||
print(f"搜索章节页面失败: {e}")
|
||
|
||
return estimated_page - 1 if estimated_page > 0 else 0
|
||
|
||
def crawl_complete(self):
|
||
self.progress_bar.stop()
|
||
self.progress_bar.pack_forget()
|
||
if self.crawl_btn['text'] == '停止爬取':
|
||
self.crawl_btn.config(text='开始爬取')
|
||
self.enable_inputs()
|
||
|
||
def on_chapter_select(self, event):
|
||
selected = self.chapter_tree.selection()
|
||
if not selected:
|
||
return
|
||
|
||
item = selected[0]
|
||
index = int(self.chapter_tree.item(item, "values")[0])
|
||
self.current_chapter = index
|
||
|
||
if self.mode_var.get() == "text":
|
||
self.show_chapter(index)
|
||
else:
|
||
self.show_pdf_chapter(index)
|
||
|
||
def show_chapter(self, index):
|
||
if index < 0 or index >= len(self.chapters):
|
||
return
|
||
|
||
self.current_chapter = index
|
||
title, _, _ = self.chapters[index]
|
||
|
||
if index in self.content_cache:
|
||
content = self.content_cache[index]
|
||
self.status_label.config(text=f"第 {index+1}/{len(self.chapters)} 章:{title}")
|
||
self.display_content(title, content)
|
||
return
|
||
|
||
self.status_label.config(text=f"正在加载第 {index+1} 章:{title}")
|
||
|
||
def load_content():
|
||
content = self.spider.get_content(self.chapters[index])
|
||
self.content_cache[index] = content
|
||
self.root.after(0, lambda: self.display_content(title, content))
|
||
self.root.after(0, lambda: self.status_label.config(text=f"第 {index+1}/{len(self.chapters)} 章:{title}"))
|
||
|
||
threading.Thread(target=load_content, daemon=True).start()
|
||
|
||
def show_pdf_chapter(self, index):
|
||
if index < 0 or index >= len(self.chapters):
|
||
return
|
||
|
||
self.current_chapter = index
|
||
title, _, _ = self.chapters[index]
|
||
|
||
if self.pdf_frame.pdf_document:
|
||
doc = self.pdf_frame.pdf_document
|
||
for page_num in range(doc.page_count):
|
||
page = doc.load_page(page_num)
|
||
text = page.get_text()
|
||
if title in text:
|
||
self.pdf_frame.show_page(page_num)
|
||
self.status_label.config(text=f"第 {index+1}/{len(self.chapters)} 章:{title}")
|
||
return
|
||
|
||
self.status_label.config(text=f"未找到章节 '{title}'")
|
||
|
||
def display_content(self, title, content):
|
||
self.content_text.delete(1.0, tk.END)
|
||
self.content_text.insert(tk.END, f"{title}\n\n")
|
||
self.content_text.insert(tk.END, content)
|
||
self.content_text.config(state=tk.DISABLED)
|
||
|
||
def prev_chapter(self):
|
||
if self.mode_var.get() == "text":
|
||
if self.current_chapter > 0:
|
||
self.current_chapter -= 1
|
||
self.chapter_tree.selection_set(self.chapter_tree.get_children()[self.current_chapter])
|
||
self.show_chapter(self.current_chapter)
|
||
else:
|
||
if self.current_chapter > 0:
|
||
self.current_chapter -= 1
|
||
self.chapter_tree.selection_set(self.chapter_tree.get_children()[self.current_chapter])
|
||
self.show_pdf_chapter(self.current_chapter)
|
||
|
||
def next_chapter(self):
|
||
if self.mode_var.get() == "text":
|
||
if self.current_chapter < len(self.chapters) - 1:
|
||
self.current_chapter += 1
|
||
self.chapter_tree.selection_set(self.chapter_tree.get_children()[self.current_chapter])
|
||
self.show_chapter(self.current_chapter)
|
||
else:
|
||
if self.current_chapter < len(self.chapters) - 1:
|
||
self.current_chapter += 1
|
||
self.chapter_tree.selection_set(self.chapter_tree.get_children()[self.current_chapter])
|
||
self.show_pdf_chapter(self.current_chapter)
|
||
|
||
def switch_mode(self):
|
||
mode = self.mode_var.get()
|
||
if mode == "text":
|
||
self.pdf_frame.pack_forget()
|
||
self.text_frame.pack(fill=tk.BOTH, expand=True)
|
||
self.left_frame.pack(fill=tk.Y, side=tk.LEFT)
|
||
if self.chapters:
|
||
self.show_chapter(self.current_chapter)
|
||
else:
|
||
self.text_frame.pack_forget()
|
||
self.left_frame.pack_forget()
|
||
self.pdf_frame.pack(fill=tk.BOTH, expand=True)
|
||
if self.current_pdf_path and os.path.exists(self.current_pdf_path):
|
||
self.pdf_frame.load_pdf(self.current_pdf_path)
|
||
|
||
def open_pdf_file(self):
|
||
file_path = filedialog.askopenfilename(filetypes=[("PDF文件", "*.pdf")])
|
||
if file_path:
|
||
self.current_pdf_path = file_path
|
||
self.mode_var.set("pdf")
|
||
self.switch_mode()
|
||
|
||
if __name__ == "__main__":
|
||
root = tk.Tk()
|
||
app = NovelReaderApp(root)
|
||
root.mainloop() |