批量下载图灵已购图书电子版

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
const puppeteer = require('puppeteer');

const LOGIN_URL = "http://account.ituring.com.cn/log-in"
const USERNAME_SELECTOR = '#Email'
const PASSWORD_SELECTOR = '#Password'
const LOGIN_BUTTON_SELECTOR = '#loginForm > form > div:nth-child(6) > div > input'

const BOOKS_URL = 'http://www.ituring.com.cn/user/shelf'
const BOOK_SELECTOR = 'body > div.container.page-box.my-profile > div > div.col-md-9.main > div:nth-child(2) > ul > li > div.book-img > a'
const DOWNLOAD_SELECTOR = 'body > div.container > div > div.col-md-3.pull-right.side > ul > li:nth-child(1) > ul > li:nth-child(2) > a'

const username = '图灵账号'
const password = '图灵密码'


async function createBrowser() {

// root 权限下需要取消sandbox
// '--incognito' 未实现
let chromeOptions = {
args: ['--no-sandbox', '--disable-setuid-sandbox'],
ignoreHTTPSErrors: true
}

if(process.env.NODE_ENV == "production") {
chromeOptions = Object.assign({}, chromeOptions, {
headless: true
})
} else {
chromeOptions = Object.assign({}, chromeOptions, {
headless: false,
devtools: true,
})
}

var browser = await puppeteer.launch(chromeOptions);
return { browser }
}

async function createContextAndPage({browser}) {

browser = browser || __browser
var context = await browser.createIncognitoBrowserContext();
var page = await context.newPage();
await page.setDefaultNavigationTimeout(120 * 1000)
if(process.env.NODE_ENV == "production") {
} else {
await page.setViewport({
width: 1366,
height: 768
})
}

return { context, page }
}

class Utils {
static timeout(delay) {
return new Promise((resolve, reject) => {
setTimeout(() => {
try {
resolve(1)
} catch (e) {
reject(0)
}
}, delay)
})
}
}


;(async ()=>{
var { browser } = await createBrowser()
var { context, page } = await createContextAndPage({browser})

try{
// 登录
await page.goto(LOGIN_URL)

await page.type(USERNAME_SELECTOR, username)
await page.type(PASSWORD_SELECTOR, password)
await Promise.all([
page.click(LOGIN_BUTTON_SELECTOR),
page.waitForNavigation({
timeout: 10000
})
]);

// 跳转 书籍列表页
await page.goto(BOOKS_URL)

// 获取书籍列表
let bookUrls = await page.$$eval(BOOK_SELECTOR, lis => {
return lis.map(li => {
return li.href;
})
});


var result = []

for(let i = 0; i< bookUrls.length; ++i) {
let bookUrl = bookUrls[i]
console.log(bookUrl , "\n")
await page.goto(bookUrl)
await Utils.timeout(3000)
let title = await page.title()

try{
let downloadUrl = await page.$eval(DOWNLOAD_SELECTOR, item => item.href);
console.log("downloadUrl: ", downloadUrl , "\n")
await page.goto(downloadUrl)
await Utils.timeout(10000)
result.push({
title: title,
bookUrl: bookUrl,
error: '',
downloadUrl: downloadUrl
})
} catch(err) {
console.log("error: ", err)
result.push({
title: title,
bookUrl: bookUrl,
error: err.message,
downloadUrl: ''
})
}

}
} catch(err) {

} finally {
// await browser.close()
console.log(result)
}
})()