Skip to content
Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: starkwang/Zhihu-Spider
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: master
Choose a base ref
...
head repository: level-up-team/Zhihu-Spider
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: master
Choose a head ref
Checking mergeability… Don’t worry, you can still create the pull request.
  • 1 commit
  • 9 files changed
  • 1 contributor

Commits on Mar 24, 2016

  1. ES5 to ES6

    q545244819 committed Mar 24, 2016
    Copy the full SHA
    a83c644 View commit details
Showing with 295 additions and 238 deletions.
  1. +2 −1 .gitignore
  2. +1 −1 client/index.html
  3. +1 −1 client/src/index.js
  4. +23 −0 gulpfile.js
  5. +20 −21 index.js
  6. +10 −2 package.json
  7. +107 −90 src/Spider.js
  8. +86 −84 src/fetchFollwerOrFollwee.js
  9. +45 −38 src/getUser.js
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
node_modules/
dist/

# IDE
.idea

# config
config.js
config.js
2 changes: 1 addition & 1 deletion client/index.html
Original file line number Diff line number Diff line change
@@ -12,6 +12,6 @@
<div id="main" style="width: 1000px;height:800px;"></div>
</body>

<script src="/js/bundle.js"></script>
<script src="/build/bundle.js"></script>

</html>
2 changes: 1 addition & 1 deletion client/src/index.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
var echarts = require('echarts');
var socket = require('socket.io-client')('http://localhost:3001');
var socket = require('socket.io-client')('http://localhost:8080');
var $ = require('jquery');
var myChart = echarts.init(document.getElementById('main'));
var echartParser = require('./echartParser');
23 changes: 23 additions & 0 deletions gulpfile.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
"use strict"

const gulp = require('gulp')
const babel = require('gulp-babel')
const sourcemaps = require('gulp-sourcemaps')
const plumber = require('gulp-plumber')

gulp.task('babel', () => {
return gulp.src('src/*.js')
.pipe(plumber())
.pipe(sourcemaps.init())
.pipe(babel({
presets: ['es2015']
}))
.pipe(sourcemaps.write('.'))
.pipe(gulp.dest('dist'))
})

gulp.task('watch', () => {
gulp.watch('src/*.js', ['babel'])
})

gulp.task('default', ['watch'])
41 changes: 20 additions & 21 deletions index.js
Original file line number Diff line number Diff line change
@@ -1,26 +1,25 @@
var Spider = require('./src/Spider');
var express = require('express');
var bodyParser = require('body-parser');
const express = require('express')
const bodyParser = require('body-parser')
const http = require('http')
const Spider = require('./dist/Spider')

var app = express();
var server = require('http').createServer(app);
var io = require('socket.io')(server);
io.on('connection', function(socket) {
socket.on('fetch start', function(data) {
Spider(data.url, socket);
});
});
server.listen(3001);
const app = express()
const server = http.createServer(app)
const io = require('socket.io')(server)

app.use(bodyParser())
app.use(express.static('./client'))

app.use(bodyParser());// WARNING
app.use('/js', express.static('./client/build'));
app.use('/css', express.static('./client/build'));
app.get('/', (req, res) => {
res.sendFile(__dirname + '/client/index.html')
})

app.get('/', function(req, res) {
res.sendFile(__dirname + '/client/index.html');
});
io.on('connection', socket => {
socket.on('fetch start', data => {
Spider(data.url, socket)
})
})

app.listen(3000,function(){
console.log('server start at 127.0.0.1:%s',this.address().port)
});
server.listen(3001)

app.listen(8080)
12 changes: 10 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
@@ -9,10 +9,18 @@
"express": "^4.13.4",
"gexf": "^0.2.5",
"request": "^2.69.0",
"socket.io": "^1.4.5"
"socket.io": "^1.4.5",
"tracer": "^0.8.3"
},
"devDependencies": {
"babel-preset-es2015": "^6.5.0",
"gulp": "^3.9.1",
"gulp-babel": "^6.1.2",
"gulp-plumber": "^1.1.0",
"gulp-sourcemaps": "^1.6.0"
},
"devDependencies": {},
"scripts": {
"start": "node app",
"test": "echo \"Error: no test specified\" && exit 1"
},
"author": "",
197 changes: 107 additions & 90 deletions src/Spider.js
Original file line number Diff line number Diff line change
@@ -1,96 +1,113 @@
var fetchFollwerOrFollwee = require('./fetchFollwerOrFollwee');
var getUser = require('./getUser');
var Promise = require('bluebird');
var config = require('../config');
module.exports = Spider;

function Spider(userPageUrl, socket) {
socket.emit('notice', '抓取用户信息......');
return getUser(userPageUrl)
.then(function(user) {
socket.emit('notice', '抓取用户信息成功');
socket.emit('get user', user);
return getFriends(user, socket);
})
.then(function(myFriends) {
return Promise.map(myFriends, function(myFriend) {
return getUser(myFriend.url);
}, { concurrency: config.concurrency ? config.concurrency : 3 });
})
.then(function(myFriends) {
var input = [];
myFriends.forEach(function(friend) {
input.push({
"user": friend,
"sameFriends": []
})
});
socket.emit('data', input);

console.log(myFriends);
return Promise.map(myFriends, function(myFriend) {
return searchSameFriend(myFriend, myFriends, socket);
}, { concurrency: config.concurrency ? config.concurrency : 3 });
})
.then(function(result) {
var data = result;
socket.emit('data', data);

})
.catch(function(err) {
console.log(err);
})
"use strict"

import Promise from 'bluebird'
import tracer from 'tracer'
import fetchFollwerOrFollwee from './fetchFollwerOrFollwee'
import getUser from './getUser'
import config from '../config'

const logger = tracer.colorConsole()

const Spider = (userPageUrl, socket) => {
const concurrency = config.concurrency ? config.concurrency : 3

socket.emit('notice', '抓取用户信息......')

return getUser(userPageUrl)
.then(function (user) {
socket.emit('notice', '抓取用户信息成功')
socket.emit('get user', user)

return getFriends(user, socket)
})
.then(function (myFriends) {
return Promise.map(myFriends, myFriend => getUser(myFriend.url), {concurrency})
})
.then(function (myFriends) {
let input = []

myFriends.forEach(friend => {
input.push({
user: friend,
sameFriends: [],
})
})

socket.emit('data', input)

// debug
logger.log(myFriends)

return Promise.map(myFriends, myFriend => searchSameFriend(myFriend, myFriends, socket), {concurrency})
})
.then(function (data) {
socket.emit('data', data)
})
.catch(function (err) {
// debug
logger.error(err)
})
}
const getFriends = (user, socket) => {
const options1 = {
isFollowees: true,
user,
}
const options2 = {user}
const works = [fetchFollwerOrFollwee(options1, socket), fetchFollwerOrFollwee(options2, socket)]

return Promise.all(works)
.then(function(result) {
const [followees, followers] = result
let friends = []

followers.forEach(follower => {
followees.forEach(followee => {
if (follower.hash_id === followee.hash_id) {
friends.push(follower)
}
})
})

return friends
})
}
const searchSameFriend = (aFriend, myFriends, socket) => {
socket.emit("notice", "searchSameFriend with " + aFriend.name + "......")

// debug
logger.log("searchSameFriend with " + aFriend.name + "......")

return getFriends(aFriend, socket)
.then(function(targetFriends) {
let sameFriends = []

// debug
logger.log('counting for ' + aFriend.name + '......')
logger.log("\n\n==============\n Same Friends with " + aFriend.name + "\n")

targetFriends.forEach(targetFriend => {
myFriends.forEach(myFriend => {
if (targetFriend.hash_id === myFriend.hash_id) {
sameFriends.push(targetFriend)
}
})
})

socket.emit('same friend', {
hash_id: aFriend.hash_id,
sameFriends: sameFriends
})

// debug
logger.log(sameFriends)
logger.log("\n\n")

function getFriends(user, socket) {
var works = [fetchFollwerOrFollwee({
isFollowees: true,
user: user
}, socket), fetchFollwerOrFollwee({
user: user
}, socket)];
return Promise.all(works).then(function(result) {
var followees = result[0];
var followers = result[1];
var friends = [];
followers.forEach(function(follower) {
followees.forEach(function(followee) {
if (follower.hash_id === followee.hash_id) {
friends.push(follower);
}
});
});
return friends;
});
return {
user: aFriend,
sameFriends,
}
})
}

function searchSameFriend(aFriend, myFriends, socket) {
socket.emit("notice", "searchSameFriend with " + aFriend.name + "......");
console.log("searchSameFriend with " + aFriend.name + "......");
return getFriends(aFriend, socket)
.then(function(targetFriends) {
var sameFriends = [];
console.log('counting for ' + aFriend.name + '......')
targetFriends.forEach(function(targetFriend) {
myFriends.forEach(function(myFriend) {
if (targetFriend.hash_id === myFriend.hash_id) {
sameFriends.push(targetFriend);
}
})
})
console.log("\n\n==============\n Same Friends with " + aFriend.name + "\n");
socket.emit('same friend', {
hash_id: aFriend.hash_id,
sameFriends: sameFriends
})
console.log(sameFriends);
console.log("\n\n");

return {
user: aFriend,
sameFriends: sameFriends
};
})
}
module.exports = Spider
Loading