import tkinter as tk from tkinter import ttk, messagebox, filedialog import threading import os import sys import requests from bs4 import BeautifulSoup import fitz class NovelSpider: def __init__(self): self.session = requests.Session() adapter = requests.adapters.HTTPAdapter( pool_connections=20, pool_maxsize=20, max_retries=2 ) self.session.mount('http://', adapter) self.session.mount('https://', adapter) self.session.verify = False self.session.headers.update({ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8', 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8', 'Referer': 'https://www.92yanqing.com/', 'Connection': 'keep-alive' }) self.timeout = 8 def get_chapters(self, book_url): try: response = self.session.get(book_url, timeout=self.timeout) response.encoding = "utf-8" soup = BeautifulSoup(response.text, 'html.parser') chapters = [] chapter_list = soup.find('div', class_='chapterlist') if not chapter_list: chapter_list = soup.find('div', class_='listmain') if not chapter_list: chapter_list = soup.find('div', id='list') if not chapter_list: chapter_list = soup.find('ul', class_='chapterlist') if not chapter_list: chapter_list = soup.find('div', class_='chapter') if chapter_list: links = chapter_list.find_all('a', href=True) for link in links: href = link.get('href') title = link.get_text(strip=True) if href and title and '/read/' in href: if not href.startswith('http'): if href.startswith('/'): href = "https://www.92yanqing.com" + href else: href = book_url.rstrip('/') + '/' + href chapters.append((title, href, book_url)) if len(chapters) < 10: start_read_link = soup.find('a', text='开始阅读') if start_read_link: start_url = start_read_link.get('href') if not start_url.startswith('http'): if start_url.startswith('/'): start_url = "https://www.92yanqing.com" + start_url else: start_url = book_url.rstrip('/') + '/' + start_url try: response = self.session.get(start_url) response.encoding = "utf-8" soup = BeautifulSoup(response.text, 'html.parser') chapter_select = soup.find('select') if chapter_select: options = chapter_select.find_all('option') for option in options: value = option.get('value') title = option.get_text(strip=True) if value and title and value != '#': if not value.startswith('http'): if value.startswith('/'): value = "https://www.92yanqing.com" + value else: value = book_url.rstrip('/') + '/' + value chapters.append((title, value, book_url)) except Exception as e: print(f"尝试从开始阅读页面获取章节失败: {e}") chapters.sort(key=lambda x: x[0]) print(f"获取到 {len(chapters)} 章") return chapters except Exception as e: print(f"获取章节失败: {e}") return [] def get_content(self, chapter_info): title, url, book_url = chapter_info content = "" has_chapter_end = False try: while url: response = self.session.get(url, timeout=self.timeout) response.encoding = "utf-8" soup = BeautifulSoup(response.text, 'html.parser') content_div = soup.find('div', id='content') or soup.find('div', class_='content') if content_div: for script in content_div(['script', 'style']): script.decompose() text = content_div.get_text(separator='\n', strip=True) has_chapter_end = '本章完' in text text = text.replace('本章未完,点击下一页继续阅读', '') text = text.replace('本章完', '') text = text.replace('请记住本书首发域名:www.92yanqing.com。', '') text = text.replace('92言情小说网', '') text = text.replace('最快更新无弹窗小说', '') lines = text.split('\n') cleaned_lines = [] for line in lines: line = line.strip() if line and line != title: cleaned_lines.append(line) content += '\n'.join(cleaned_lines) + '\n\n' next_page = None next_link = soup.find('a', text='下一页') or soup.find('a', text='下一章') if next_link: next_page = next_link.get('href') if not next_page.startswith('http'): if next_page.startswith('/'): next_page = "https://www.92yanqing.com" + next_page else: next_page = book_url.rstrip('/') + '/' + next_page if has_chapter_end or not next_page: break url = next_page return content.strip() except Exception as e: print(f"获取内容失败: {e}") return content class PDFViewer(tk.Frame): def __init__(self, parent): super().__init__(parent) self.parent = parent self.pdf_document = None self.current_page = 0 self.total_pages = 0 self.page_images = {} self.outline = [] self.paned_window = ttk.PanedWindow(self, orient=tk.HORIZONTAL) self.paned_window.pack(fill=tk.BOTH, expand=True) self.outline_frame = ttk.Frame(self, width=200, borderwidth=1, relief="solid") self.outline_frame.pack_propagate(False) self.outline_tree = ttk.Treeview(self.outline_frame, show='tree') self.outline_tree.pack(fill=tk.BOTH, expand=True) self.outline_tree.bind('<>', self.on_outline_select) self.outline_tree.insert("", tk.END, text="请打开PDF文件") self.content_frame = ttk.Frame(self) self.content_frame.pack_propagate(False) self.canvas = tk.Canvas(self.content_frame, bg='white') self.canvas.pack(fill=tk.BOTH, expand=True) self.scrollbar_y = ttk.Scrollbar(self.content_frame, orient=tk.VERTICAL, command=self.canvas.yview) self.scrollbar_y.pack(side=tk.RIGHT, fill=tk.Y) self.canvas.config(yscrollcommand=self.scrollbar_y.set) self.scrollbar_x = ttk.Scrollbar(self.content_frame, orient=tk.HORIZONTAL, command=self.canvas.xview) self.scrollbar_x.pack(side=tk.BOTTOM, fill=tk.X) self.canvas.config(xscrollcommand=self.scrollbar_x.set) self.canvas.bind('', self.on_mousewheel) self.canvas.bind('', self.on_mousewheel) self.canvas.bind('', self.on_mousewheel) self.status_label = ttk.Label(self, text="") self.status_label.pack(side=tk.BOTTOM, fill=tk.X) def load_pdf(self, file_path): try: print(f"开始加载PDF: {file_path}") self.pdf_document = fitz.open(file_path) self.total_pages = len(self.pdf_document) self.current_page = 0 self.page_images = {} print(f"PDF加载成功,共 {self.total_pages} 页") self.load_outline() self.show_page(0) self.update_status() return True except Exception as e: print(f"加载PDF失败: {e}") messagebox.showerror("错误", f"加载PDF失败: {str(e)}") return False def load_outline(self): for item in self.outline_tree.get_children(): self.outline_tree.delete(item) self.outline = [] try: toc = self.pdf_document.get_toc() print(f"获取到书签数量: {len(toc) if toc else 0}") if toc: parent_map = {0: ""} count = 0 for i, entry in enumerate(toc[:5]): print(f"书签 {i}: {entry}") for entry in toc: level, title, page_num = entry if level not in parent_map: parent_map[level] = parent_map.get(level - 1, "") parent = parent_map.get(level - 1, "") try: item_id = self.outline_tree.insert(parent, tk.END, text=title, values=(page_num - 1,)) parent_map[level] = item_id self.outline.append((title, page_num - 1)) count += 1 except Exception as insert_e: print(f"插入书签失败 '{title}': {insert_e}") print(f"已加载 {count} 个书签") print(f"Treeview子节点数量: {len(self.outline_tree.get_children())}") else: self.outline_tree.insert("", tk.END, text="该PDF没有书签") print("PDF没有书签") except Exception as e: self.outline_tree.insert("", tk.END, text="加载书签失败") print(f"加载书签失败: {e}") panes = list(self.paned_window.panes()) for pane in panes: self.paned_window.forget(pane) self.paned_window.add(self.outline_frame, weight=1) self.paned_window.add(self.content_frame, weight=3) def on_outline_select(self, event): selected = self.outline_tree.selection() if selected: item = selected[0] page_num = int(self.outline_tree.item(item, "values")[0]) self.show_page(page_num) def show_page(self, page_num): if not self.pdf_document or page_num < 0 or page_num >= self.total_pages: return self.current_page = page_num if page_num in self.page_images: img = self.page_images[page_num] else: page = self.pdf_document.load_page(page_num) pix = page.get_pixmap(matrix=fitz.Matrix(2, 2)) img = tk.PhotoImage(data=pix.tobytes('ppm')) self.page_images[page_num] = img self.canvas.image = img self.canvas.delete('all') self.canvas.create_image(0, 0, anchor=tk.NW, image=img) self.canvas.config(scrollregion=self.canvas.bbox(tk.ALL)) self.canvas.yview_moveto(0) self.canvas.xview_moveto(0) self.update_status() def update_status(self): if self.pdf_document: self.status_label.config(text=f"第 {self.current_page + 1} / {self.total_pages} 页") else: self.status_label.config(text="") def on_mousewheel(self, event): if event.num == 4 or event.delta > 0: self.canvas.yview_scroll(-1, 'units') else: self.canvas.yview_scroll(1, 'units') def next_page(self): if self.current_page < self.total_pages - 1: self.show_page(self.current_page + 1) def prev_page(self): if self.current_page > 0: self.show_page(self.current_page - 1) class NovelReaderApp: def __init__(self, root): self.root = root self.root.title("小说阅读器") self.root.geometry("1200x800") self.spider = NovelSpider() self.chapters = [] self.current_chapter = 0 self.content_cache = {} self.novel_name = "" self.create_widgets() def create_widgets(self): self.top_frame = ttk.Frame(self.root, padding="10") self.top_frame.pack(fill=tk.X, side=tk.TOP) ttk.Label(self.top_frame, text="网址:").pack(side=tk.LEFT, padx=5) self.url_entry = ttk.Entry(self.top_frame, width=50) self.url_entry.pack(side=tk.LEFT, padx=5) ttk.Label(self.top_frame, text="小说名称:").pack(side=tk.LEFT, padx=5) self.name_entry = ttk.Entry(self.top_frame, width=30) self.name_entry.pack(side=tk.LEFT, padx=5) self.crawl_btn = ttk.Button(self.top_frame, text="开始爬取", command=self.start_crawl) self.crawl_btn.pack(side=tk.LEFT, padx=5) self.search_btn = ttk.Button(self.top_frame, text="搜索小说", command=self.search_novel) self.search_btn.pack(side=tk.LEFT, padx=5) self.open_pdf_btn = ttk.Button(self.top_frame, text="打开PDF", command=self.open_pdf_file) self.open_pdf_btn.pack(side=tk.LEFT, padx=5) self.mode_var = tk.StringVar(value="text") ttk.Radiobutton(self.top_frame, text="文本阅读", variable=self.mode_var, value="text", command=self.switch_mode).pack(side=tk.LEFT, padx=5) ttk.Radiobutton(self.top_frame, text="PDF阅读", variable=self.mode_var, value="pdf", command=self.switch_mode).pack(side=tk.LEFT, padx=5) self.progress_bar = ttk.Progressbar(self.top_frame, mode='determinate', maximum=100, value=0) self.main_frame = ttk.Frame(self.root) self.main_frame.pack(fill=tk.BOTH, expand=True) self.left_frame = ttk.Frame(self.main_frame, width=250, borderwidth=1, relief="solid") self.left_frame.pack(fill=tk.Y, side=tk.LEFT) self.left_frame.pack_propagate(False) self.chapter_tree = ttk.Treeview(self.left_frame, columns=('index',), show='tree') self.chapter_tree.pack(fill=tk.BOTH, expand=True) self.chapter_tree.bind('<>', self.on_chapter_select) self.chapter_tree.insert("", tk.END, text="请输入小说网址并爬取") print("章节树已初始化,显示提示文本") self.text_frame = ttk.Frame(self.main_frame) self.text_frame.pack(fill=tk.BOTH, expand=True) self.content_text = tk.Text(self.text_frame, wrap=tk.WORD, font=('SimSun', 12)) self.content_text.pack(fill=tk.BOTH, expand=True) self.pdf_frame = PDFViewer(self.main_frame) self.bottom_frame = ttk.Frame(self.root, padding="10") self.bottom_frame.pack(fill=tk.X, side=tk.BOTTOM) self.status_label = ttk.Label(self.bottom_frame, text="准备就绪") self.status_label.pack(side=tk.RIGHT) self.current_pdf_path = "" self.stop_crawl_flag = False def sort_chapters(self, chapters): import re def extract_chapter_number(title): match = re.search(r'第(\d+)章', title) if match: return int(match.group(1)) match = re.search(r'(\d+)、', title) if match: return int(match.group(1)) match = re.search(r'(\d+) ', title) if match: return int(match.group(1)) return 0 sorted_chapters = sorted(chapters, key=lambda x: extract_chapter_number(x[0])) return sorted_chapters def disable_inputs(self): self.url_entry.config(state=tk.DISABLED) self.name_entry.config(state=tk.DISABLED) self.search_btn.config(state=tk.DISABLED) self.open_pdf_btn.config(state=tk.DISABLED) def enable_inputs(self): self.url_entry.config(state=tk.NORMAL) self.name_entry.config(state=tk.NORMAL) self.search_btn.config(state=tk.NORMAL) self.open_pdf_btn.config(state=tk.NORMAL) def start_crawl(self): if self.crawl_btn['text'] == '停止爬取': self.stop_crawl_flag = True self.crawl_btn.config(text='开始爬取') self.status_label.config(text="爬取已停止") self.enable_inputs() return book_url = self.url_entry.get().strip() self.novel_name = self.name_entry.get().strip() if not book_url: messagebox.showwarning("警告", "请输入网址") return if not self.novel_name: messagebox.showwarning("警告", "请输入小说名称") return self.stop_crawl_flag = False self.content_cache = {} self.chapters = [] self.disable_inputs() self.crawl_btn.config(text='停止爬取') self.progress_bar.pack(side=tk.LEFT, padx=5, fill=tk.X, expand=True) self.start_crawl_thread() def search_novel(self): novel_name = self.name_entry.get().strip() if not novel_name: messagebox.showwarning("警告", "请输入小说名称") return self.status_label.config(text=f"正在搜索《{novel_name}》...") def search_thread(): try: search_url = f"https://www.92yanqing.com/s/?searchkey={novel_name}" response = self.spider.session.get(search_url, timeout=10) response.encoding = "utf-8" soup = BeautifulSoup(response.text, 'html.parser') results = [] book_items = soup.find_all('div', class_='bookitem') or soup.find_all('div', class_='search-item') if not book_items: book_items = soup.find_all('a', href=True) for item in book_items: title = item.get_text(strip=True) href = item.get('href', '') if title and href and '/read/' in href: if not href.startswith('http'): href = "https://www.92yanqing.com" + href if novel_name in title: results.append((title, href)) if not results: self.root.after(0, lambda: messagebox.showwarning("提示", f"未找到《{novel_name}》相关小说")) return self.root.after(0, lambda: self.show_search_results(results)) except Exception as e: print(f"搜索失败: {e}") self.root.after(0, lambda: messagebox.showerror("错误", f"搜索失败: {str(e)}")) threading.Thread(target=search_thread, daemon=True).start() def show_search_results(self, results): search_window = tk.Toplevel(self.root) search_window.title("搜索结果") search_window.geometry("800x500") tree = ttk.Treeview(search_window, columns=('url',), show='tree') tree.pack(fill=tk.BOTH, expand=True) for title, url in results: tree.insert("", tk.END, text=title, values=(url,)) def on_select(event): selected = tree.selection() if selected: item = selected[0] url = tree.item(item, "values")[0] title = tree.item(item, "text") self.url_entry.delete(0, tk.END) self.url_entry.insert(0, url) self.name_entry.delete(0, tk.END) self.name_entry.insert(0, title) search_window.destroy() tree.bind('<>', on_select) select_btn = ttk.Button(search_window, text="选择", command=lambda: on_select(None)) select_btn.pack(pady=10) self.status_label.config(text="搜索完成") def start_crawl_thread(self): book_url = self.url_entry.get().strip() self.novel_name = self.name_entry.get().strip() def crawl_thread(): try: self.root.after(0, lambda: self.progress_bar.config(value=0)) self.root.after(0, lambda: self.status_label.config(text="正在获取章节列表...")) print("开始获取章节列表") self.chapters = self.spider.get_chapters(book_url) print(f"获取章节完成,共 {len(self.chapters)} 章") if self.stop_crawl_flag: return if not self.chapters: self.root.after(0, lambda: messagebox.showwarning("警告", "未能获取章节列表")) return self.root.after(0, lambda: self.progress_bar.config(value=5)) self.root.after(0, lambda: self.status_label.config(text="正在排序章节...")) self.chapters = self.sort_chapters(self.chapters) print(f"章节排序完成,共 {len(self.chapters)} 章") self.root.after(0, lambda: self.progress_bar.config(value=8)) print("准备更新章节树") self.root.after(0, self.update_chapter_tree) self.root.after(0, lambda: self.status_label.config(text="正在生成PDF...")) self.root.after(0, self.auto_generate_pdf) except Exception as e: self.root.after(0, lambda: messagebox.showerror("错误", f"爬取失败: {str(e)}")) finally: self.root.after(0, self.crawl_complete) threading.Thread(target=crawl_thread, daemon=True).start() def update_chapter_tree(self): print(f"update_chapter_tree 被调用,章节数: {len(self.chapters)}") for item in self.chapter_tree.get_children(): self.chapter_tree.delete(item) for i, (title, _, _) in enumerate(self.chapters): self.chapter_tree.insert("", tk.END, text=title, values=(i,)) print(f"章节树已更新,共 {len(self.chapters)} 章") self.crawl_btn.config(state=tk.NORMAL) self.status_label.config(text=f"《{self.novel_name}》获取成功,共 {len(self.chapters)} 章") if self.chapters: self.chapter_tree.selection_set(self.chapter_tree.get_children()[0]) self.show_chapter(0) def auto_generate_pdf(self): try: from reportlab.pdfbase import pdfmetrics from reportlab.pdfbase.ttfonts import TTFont from reportlab.platypus import SimpleDocTemplate, Paragraph, PageBreak from reportlab.lib.styles import ParagraphStyle from reportlab.lib import colors from reportlab.lib.pagesizes import A4 from reportlab.lib.units import inch except ImportError as e: msg = f"需要安装reportlab才能生成PDF\n错误: {e}\n请运行: pip install reportlab" self.status_label.config(text="需要安装reportlab") messagebox.showwarning("提示", msg) return self.status_label.config(text=f"正在多线程获取内容...") def pdf_thread(): try: from concurrent.futures import ThreadPoolExecutor, as_completed import urllib3 urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) total = len(self.chapters) max_workers = min(10, total) def fetch_content(args): idx, chapter = args try: if self.stop_crawl_flag: return idx, None content = self.spider.get_content(chapter) return idx, content except Exception as e: print(f"章节 {idx} 获取失败: {e}") return idx, None self.root.after(0, lambda: self.status_label.config(text=f"多线程获取内容... (0/{total})")) print(f"开始多线程获取内容,共 {total} 章,使用 {max_workers} 个线程") with ThreadPoolExecutor(max_workers=max_workers) as executor: futures = {} for i, chapter in enumerate(self.chapters): if self.stop_crawl_flag: break future = executor.submit(fetch_content, (i, chapter)) futures[future] = i print(f"已提交任务 {i+1}/{total}") completed = 0 for future in as_completed(futures): if self.stop_crawl_flag: executor.shutdown(wait=False) return idx, content = future.result() if content: self.content_cache[idx] = content completed += 1 progress = 10 + int(completed / total * 75) self.root.after(0, lambda p=progress: self.progress_bar.config(value=p)) self.root.after(0, lambda p=progress, c=completed, t=total: self.status_label.config( text=f"多线程获取内容... {p}% ({c}/{t})")) download_dir = os.path.join(os.getcwd(), "download") if not os.path.exists(download_dir): os.makedirs(download_dir) font_path = None possible_font_paths = [ 'SimSun.ttf', 'simsun.ttc', os.path.join('C:', 'Windows', 'Fonts', 'simsun.ttc'), os.path.join('C:', 'Windows', 'Fonts', 'SimSun.ttf'), os.path.join(os.environ.get('WINDIR', 'C:\\Windows'), 'Fonts', 'simsun.ttc'), os.path.join(os.environ.get('WINDIR', 'C:\\Windows'), 'Fonts', 'SimSun.ttf') ] for path in possible_font_paths: if os.path.exists(path): font_path = path break if font_path: pdfmetrics.registerFont(TTFont('SimSun', font_path)) else: messagebox.showwarning("警告", "未找到中文字体,可能影响PDF生成") self.current_pdf_path = os.path.join(download_dir, f"{self.novel_name}.pdf") from reportlab.pdfgen import canvas doc = SimpleDocTemplate(self.current_pdf_path, pagesize=A4, leftMargin=50, rightMargin=50, topMargin=50, bottomMargin=50) title_style = ParagraphStyle("BookTitle", fontSize=20, alignment=1, spaceAfter=30, fontName="SimSun") chap_style = ParagraphStyle("ChapTitle", fontSize=14, spaceBefore=20, spaceAfter=10, fontName="SimSun", textColor=colors.darkblue) txt_style = ParagraphStyle("Content", fontSize=11, leading=18, spaceAfter=6, fontName="SimSun") story = [] story.append(Paragraph(self.novel_name, title_style)) story.append(PageBreak()) self.root.after(0, lambda: self.status_label.config(text="正在生成PDF...")) for i, chapter_info in enumerate(self.chapters): if self.stop_crawl_flag: return title, _, _ = chapter_info if i in self.content_cache: content = self.content_cache[i] else: content = self.spider.get_content(chapter_info) self.content_cache[i] = content chapter_para = Paragraph(title, chap_style) chapter_para._bookmarkName = title chapter_para._bookmarkLevel = 0 story.append(chapter_para) if content: lines = content.split("\n") for line in lines: line = line.strip() if line and line != title and title not in line: story.append(Paragraph(line, txt_style)) story.append(PageBreak()) progress = 85 + int((i+1)/len(self.chapters) * 10) self.root.after(0, lambda p=progress: self.progress_bar.config(value=p)) self.root.after(0, lambda p=progress: self.status_label.config(text=f"正在生成PDF... {p}%")) def onFirstPage(canvas, doc): canvas.saveState() canvas.setFont('SimSun', 9) canvas.drawString(inch, 0.75 * inch, f"《{self.novel_name}》") canvas.restoreState() def onLaterPages(canvas, doc): canvas.saveState() canvas.setFont('SimSun', 9) canvas.drawString(inch, 0.75 * inch, f"第 {doc.page} 页") canvas.drawRightString(doc.pagesize[0] - inch, 0.75 * inch, f"《{self.novel_name}》") canvas.restoreState() print("开始构建PDF文档...") def add_bookmarks(canvas, doc): for element in story: if hasattr(element, '_bookmarkName'): canvas.bookmarkPage(element._bookmarkName) canvas.addOutlineEntry(element._bookmarkName, element._bookmarkName, element._bookmarkLevel, 0) doc.build(story, onFirstPage=onFirstPage, onLaterPages=onLaterPages) print("PDF文档构建完成") self.root.after(0, lambda: self.progress_bar.config(value=97)) self.root.after(0, lambda: self.status_label.config(text="正在添加书签...")) print("开始添加书签...") self.add_pdf_bookmarks_simple(self.current_pdf_path, self.chapters) print("书签添加完成") self.root.after(0, lambda: self.progress_bar.config(value=100)) self.root.after(0, lambda: self.status_label.config(text=f"PDF生成完成!文件已保存")) self.root.after(0, lambda: messagebox.showinfo("成功", f"PDF生成完成!\n文件位置: {self.current_pdf_path}")) except Exception as e: print(f"PDF生成失败: {e}") self.root.after(0, lambda: messagebox.showerror("错误", f"PDF生成失败: {str(e)}")) threading.Thread(target=pdf_thread, daemon=True).start() def add_pdf_bookmarks(self, pdf_path, toc_entries): try: doc = fitz.open(pdf_path) print(f"PDF共 {doc.page_count} 页,需要添加 {len(toc_entries)} 个书签") page_titles = {} for page_num in range(doc.page_count): page = doc.load_page(page_num) text = page.get_text() if page_num < 3: print(f"第 {page_num+1} 页前500字符: {text[:500]}") for title, estimated_page in toc_entries: if title in text and title not in page_titles: page_titles[title] = page_num break print(f"找到 {len(page_titles)} 个章节标题") added_count = 0 for title, page_num in toc_entries: if title in page_titles: actual_page = page_titles[title] else: actual_page = max(0, page_num - 1) if actual_page < doc.page_count: doc.insert_toc_item(title, actual_page + 1) added_count += 1 print(f"已添加 {added_count} 个书签") temp_path = pdf_path + ".tmp" doc.save(temp_path) doc.close() import shutil shutil.move(temp_path, pdf_path) print(f"书签添加成功") except Exception as e: print(f"添加书签失败: {e}") def add_pdf_bookmarks_simple(self, pdf_path, chapters): try: from pypdf import PdfReader, PdfWriter reader = PdfReader(pdf_path) writer = PdfWriter() for page in reader.pages: writer.add_page(page) total_pages = len(reader.pages) print(f"PDF共有 {total_pages} 页,开始添加书签...") last_found_page = 1 for idx, (title, _, _) in enumerate(chapters): found = False for page_idx in range(last_found_page, min(last_found_page + 5, total_pages)): text = reader.pages[page_idx].extract_text() if text and title in text: try: writer.add_outline_item(title, page_idx, parent=None) print(f"为第{idx+1}章 '{title}' 添加书签到第{page_idx+1}页") last_found_page = page_idx + 1 found = True except Exception as e: pass break if not found: for page_idx in range(last_found_page, total_pages): text = reader.pages[page_idx].extract_text() if text and title in text: try: writer.add_outline_item(title, page_idx, parent=None) print(f"为第{idx+1}章 '{title}' 添加书签到第{page_idx+1}页(跨页查找)") last_found_page = page_idx + 1 except Exception as e: pass break output_path = pdf_path + "_with_bookmarks.pdf" with open(output_path, "wb") as f: writer.write(f) import shutil shutil.move(output_path, pdf_path) print("书签添加完成!") except Exception as e: print(f"添加书签失败: {e}") def find_chapter_page(self, doc, chapter_title, estimated_page): search_start = max(0, estimated_page - 2) search_end = min(doc.page_count, estimated_page + 2) for page_num in range(search_start, search_end): page = doc.load_page(page_num) text = page.get_text() if chapter_title in text: return page_num try: for page_num in range(doc.page_count): page = doc.load_page(page_num) text = page.get_text() if chapter_title in text: return page_num except Exception as e: print(f"搜索章节页面失败: {e}") return estimated_page - 1 if estimated_page > 0 else 0 def crawl_complete(self): self.progress_bar.stop() self.progress_bar.pack_forget() if self.crawl_btn['text'] == '停止爬取': self.crawl_btn.config(text='开始爬取') self.enable_inputs() def on_chapter_select(self, event): selected = self.chapter_tree.selection() if not selected: return item = selected[0] index = int(self.chapter_tree.item(item, "values")[0]) self.current_chapter = index if self.mode_var.get() == "text": self.show_chapter(index) else: self.show_pdf_chapter(index) def show_chapter(self, index): if index < 0 or index >= len(self.chapters): return self.current_chapter = index title, _, _ = self.chapters[index] if index in self.content_cache: content = self.content_cache[index] self.status_label.config(text=f"第 {index+1}/{len(self.chapters)} 章:{title}") self.display_content(title, content) return self.status_label.config(text=f"正在加载第 {index+1} 章:{title}") def load_content(): content = self.spider.get_content(self.chapters[index]) self.content_cache[index] = content self.root.after(0, lambda: self.display_content(title, content)) self.root.after(0, lambda: self.status_label.config(text=f"第 {index+1}/{len(self.chapters)} 章:{title}")) threading.Thread(target=load_content, daemon=True).start() def show_pdf_chapter(self, index): if index < 0 or index >= len(self.chapters): return self.current_chapter = index title, _, _ = self.chapters[index] if self.pdf_frame.pdf_document: doc = self.pdf_frame.pdf_document for page_num in range(doc.page_count): page = doc.load_page(page_num) text = page.get_text() if title in text: self.pdf_frame.show_page(page_num) self.status_label.config(text=f"第 {index+1}/{len(self.chapters)} 章:{title}") return self.status_label.config(text=f"未找到章节 '{title}'") def display_content(self, title, content): self.content_text.delete(1.0, tk.END) self.content_text.insert(tk.END, f"{title}\n\n") self.content_text.insert(tk.END, content) self.content_text.config(state=tk.DISABLED) def prev_chapter(self): if self.mode_var.get() == "text": if self.current_chapter > 0: self.current_chapter -= 1 self.chapter_tree.selection_set(self.chapter_tree.get_children()[self.current_chapter]) self.show_chapter(self.current_chapter) else: if self.current_chapter > 0: self.current_chapter -= 1 self.chapter_tree.selection_set(self.chapter_tree.get_children()[self.current_chapter]) self.show_pdf_chapter(self.current_chapter) def next_chapter(self): if self.mode_var.get() == "text": if self.current_chapter < len(self.chapters) - 1: self.current_chapter += 1 self.chapter_tree.selection_set(self.chapter_tree.get_children()[self.current_chapter]) self.show_chapter(self.current_chapter) else: if self.current_chapter < len(self.chapters) - 1: self.current_chapter += 1 self.chapter_tree.selection_set(self.chapter_tree.get_children()[self.current_chapter]) self.show_pdf_chapter(self.current_chapter) def switch_mode(self): mode = self.mode_var.get() if mode == "text": self.pdf_frame.pack_forget() self.text_frame.pack(fill=tk.BOTH, expand=True) self.left_frame.pack(fill=tk.Y, side=tk.LEFT) if self.chapters: self.show_chapter(self.current_chapter) else: self.text_frame.pack_forget() self.left_frame.pack_forget() self.pdf_frame.pack(fill=tk.BOTH, expand=True) if self.current_pdf_path and os.path.exists(self.current_pdf_path): self.pdf_frame.load_pdf(self.current_pdf_path) def open_pdf_file(self): file_path = filedialog.askopenfilename(filetypes=[("PDF文件", "*.pdf")]) if file_path: self.current_pdf_path = file_path self.mode_var.set("pdf") self.switch_mode() if __name__ == "__main__": root = tk.Tk() app = NovelReaderApp(root) root.mainloop()