Skip to content

Commit

Permalink
fix
Browse files Browse the repository at this point in the history
  • Loading branch information
redpintings committed Jan 9, 2025
1 parent fb9ad20 commit 5674e96
Show file tree
Hide file tree
Showing 6 changed files with 47 additions and 5 deletions.
3 changes: 1 addition & 2 deletions Backflows/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,6 @@ def is_project_context():
# print('spider_dir:', spider_dir)
else:
print('settings.py 未找到,无法确定项目上下文')

# If in a project context, add the current directory to sys.path
if is_project_context():
sys.path.insert(0, os.getcwd())
Expand All @@ -76,7 +75,7 @@ def is_project_context():
from downloadMiddleware import UserAgentMiddleware, RetryMiddleware, ProxyMiddleware
from pipeline import Pipeline

logger.info("Running in project context.")
# logger.info("Running in project context.")
except ImportError as e:
logger.error(f"Error importing project modules: {e}")
sys.exit(1) # Exit if essential project modules are missing
Expand Down
2 changes: 1 addition & 1 deletion backflow_core/downloadMiddleware.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

import random
from loguru import logger
from . import settings
import settings
from Backflows.middleware import DownloadMiddleware


Expand Down
2 changes: 1 addition & 1 deletion backflow_core/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import json
import pymongo
import asyncio
from .settings import MONGO_URI, MONGO_DATABASE, DATA_FILE_PATH
from settings import MONGO_URI, MONGO_DATABASE, DATA_FILE_PATH


class Pipeline:
Expand Down
3 changes: 3 additions & 0 deletions backflow_core/spiders/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# -*- coding: utf-8 -*-
# @Author : ysl
# @File : __init__.py.py
40 changes: 40 additions & 0 deletions backflow_core/spiders/dongfangcaifu.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# -*- coding: utf-8 -*-
# @Author : ysl
# @File : dongfangcaifu.py.py

from loguru import logger
from utils.tools import Tools
from Backflows.base import BackFlow
from Backflows.middleware import Request
from parsel import Selector
import traceback


class Dongfangcaifu(BackFlow):
name = 'dongfangcaifu'

def __init__(self):
super().__init__()
self.ck = None
self.headers = {
'Cookie': 'st_si=07577822841164; st_asi=delete; qgqp_b_id=29bf1bcc2d53a8587adedc785cc087ba; st_pvi=23828108194402; st_sp=2024-12-03%2014%3A12%3A08; st_inirUrl=https%3A%2F%2Fwww.google.com%2F; st_sn=162; st_psi=20250106105653308-113104302701-4707866973',
'Referer': 'https://finance.eastmoney.com/a/ccjdd_2.html',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
}

async def get_page_request(self, page):
url = 'https://finance.eastmoney.com/a/ccjdd_{}.html'.format(page)
yield Request('GET', url=url, headers=self.headers, cookies=self.ck, meta={'page': page})

async def parse(self, response):
resp = response.text
resp = Selector(resp)
nodes = resp.xpath("//div[@class='artitleList']//li//a")
for node in nodes:
title = node.xpath("./text()").get()
url = node.xpath("./@href").get()
news = {
'url': url,
'title': title,
}
yield news
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def run(self):

setup(
name='backflow',
version='0.2.2', # Increment the version
version='0.2.3', # Increment the version
author='ysl', # 替换为你的名字
author_email='[email protected]', # 替换为你的邮箱
description='A simple crawler framework that implements both single run and distributed run based on Celery,'
Expand Down

0 comments on commit 5674e96

Please sign in to comment.