1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137
| const puppeteer = require('puppeteer');
const LOGIN_URL = "http://account.ituring.com.cn/log-in" const USERNAME_SELECTOR = '#Email' const PASSWORD_SELECTOR = '#Password' const LOGIN_BUTTON_SELECTOR = '#loginForm > form > div:nth-child(6) > div > input'
const BOOKS_URL = 'http://www.ituring.com.cn/user/shelf' const BOOK_SELECTOR = 'body > div.container.page-box.my-profile > div > div.col-md-9.main > div:nth-child(2) > ul > li > div.book-img > a' const DOWNLOAD_SELECTOR = 'body > div.container > div > div.col-md-3.pull-right.side > ul > li:nth-child(1) > ul > li:nth-child(2) > a'
const username = '图灵账号' const password = '图灵密码'
async function createBrowser() {
// root 权限下需要取消sandbox // '--incognito' 未实现 let chromeOptions = { args: ['--no-sandbox', '--disable-setuid-sandbox'], ignoreHTTPSErrors: true } if(process.env.NODE_ENV == "production") { chromeOptions = Object.assign({}, chromeOptions, { headless: true }) } else { chromeOptions = Object.assign({}, chromeOptions, { headless: false, devtools: true, }) }
var browser = await puppeteer.launch(chromeOptions); return { browser } }
async function createContextAndPage({browser}) {
browser = browser || __browser var context = await browser.createIncognitoBrowserContext(); var page = await context.newPage(); await page.setDefaultNavigationTimeout(120 * 1000) if(process.env.NODE_ENV == "production") { } else { await page.setViewport({ width: 1366, height: 768 }) }
return { context, page } }
class Utils { static timeout(delay) { return new Promise((resolve, reject) => { setTimeout(() => { try { resolve(1) } catch (e) { reject(0) } }, delay) }) } }
;(async ()=>{ var { browser } = await createBrowser() var { context, page } = await createContextAndPage({browser})
try{ // 登录 await page.goto(LOGIN_URL) await page.type(USERNAME_SELECTOR, username) await page.type(PASSWORD_SELECTOR, password) await Promise.all([ page.click(LOGIN_BUTTON_SELECTOR), page.waitForNavigation({ timeout: 10000 }) ]);
// 跳转 书籍列表页 await page.goto(BOOKS_URL)
// 获取书籍列表 let bookUrls = await page.$$eval(BOOK_SELECTOR, lis => { return lis.map(li => { return li.href; }) });
var result = []
for(let i = 0; i< bookUrls.length; ++i) { let bookUrl = bookUrls[i] console.log(bookUrl , "\n") await page.goto(bookUrl) await Utils.timeout(3000) let title = await page.title()
try{ let downloadUrl = await page.$eval(DOWNLOAD_SELECTOR, item => item.href); console.log("downloadUrl: ", downloadUrl , "\n") await page.goto(downloadUrl) await Utils.timeout(10000) result.push({ title: title, bookUrl: bookUrl, error: '', downloadUrl: downloadUrl }) } catch(err) { console.log("error: ", err) result.push({ title: title, bookUrl: bookUrl, error: err.message, downloadUrl: '' }) } } } catch(err) { } finally { // await browser.close() console.log(result) } })()
|