1. 鑳屾櫙
銆愮瑧灏忔灚銆慼ttps://www.xiaoxiaofeng.com涓婄嚎鍟?璧勬簮鎸佺画鏁村悎涓紝绋嬪簭鍛樺繀澶囩綉绔欙紝蹇偣鍓嶅線鍥磋鍚
import { createApp } from "vue";
import { createMetaManager} from 'vue-meta'
const app = createApp(App);
app.use(createMetaManager(false, {
meta: { tag: 'meta', nameless: true }
}));
app.mount("#app");
- 鍦?code>App.vue涓坊鍔?code>
鏍囩锛?strong>涓€瀹氳娣诲姞锛屼笉鐒朵笉鐢熸晥鍝?/strong>
3. 鍦ㄧ粍浠朵腑浣跨敤 Vue-meta
- 瀹夎骞跺紩鍏?Vue-meta 鍚庯紝鍙互鍦?Vue 缁勪欢涓娇鐢ㄥ畠銆傝鍦ㄧ粍浠朵腑娣诲姞 meta 淇℃伅锛屽鏍囬銆佹弿杩扮瓑锛屽彲浠ュ湪缁勪欢鍐呭紩鐢紝鍦╫nMounted涓瀹氱浉鍏崇殑淇℃伅锛?/li>
<script setup>
import {onMounted} from "vue";
import { useMeta } from 'vue-meta';
onMounted(() => {
useMeta({
title: '绗戝皬鏋煃?- 绋嬪簭鍛樼殑涓栧妗冩簮',
meta: [
{ name: 'keywords', content: '绗戝皬鏋?java,SpringBoot,绋嬪簭鍛? },
{ name: 'description', content: '娆㈣繋鏉ュ埌绗戝皬鏋紝鎴戜滑鑷村姏浜庢墦閫犱竴涓紑鏀俱€佸弸濂界殑鎶€鏈ぞ鍖猴紝璁╃煡璇嗗拰鏅烘収鍦ㄨ繖閲岃嚜鐢辩鎾炪€佺唤鏀俱€傛杩庡姞鍏ユ垜浠殑鏃呯▼锛屼竴璧峰湪鎶€鏈殑娴锋磱涓帰绱㈡棤闄愬彲鑳斤紒' }
]
});
});
</script>
- 涓婅堪浠g爜涓紝鎴戜滑璁惧畾浜嗛〉闈㈢殑 title锛堟爣棰橈級涓?鈥淰ue-meta 绀轰緥鈥濓紝骞舵坊鍔犱簡涓や釜 meta 鏍囩锛歞escription锛堟弿杩帮級鍜?keywords锛堝叧閿瘝锛夈€傜粍浠舵覆鏌撴椂锛孷ue-meta 灏嗚嚜鍔ㄦ洿鏂拌繖浜?meta 淇℃伅銆?/li>
3.3 Vue-meta 鐨勯珮绾х敤娉?/h4>
- Vue-meta 涓嶄粎鍙互璁剧疆 meta锛岃繕鏀寔璁剧疆鍏朵粬 HTML 鏍囩锛屽 link銆乻tyle銆乻cript 绛夈€備笅闈㈡槸涓€涓负椤甸潰娣诲姞鏍峰紡鍜岃剼鏈殑渚嬪瓙锛?/li>
<script setup>
import {onMounted} from "vue";
import { useMeta } from 'vue-meta';
onMounted(() => {
useMeta({
title: '绗戝皬鏋煃?- 绋嬪簭鍛樼殑涓栧妗冩簮',
meta: [
{ name: 'keywords', content: '绗戝皬鏋?java,SpringBoot,绋嬪簭鍛? },
{ name: 'description', content: '娆㈣繋鏉ュ埌绗戝皬鏋紝鎴戜滑鑷村姏浜庢墦閫犱竴涓紑鏀俱€佸弸濂界殑鎶€鏈ぞ鍖猴紝璁╃煡璇嗗拰鏅烘収鍦ㄨ繖閲岃嚜鐢辩鎾炪€佺唤鏀俱€傛杩庡姞鍏ユ垜浠殑鏃呯▼锛屼竴璧峰湪鎶€鏈殑娴锋磱涓帰绱㈡棤闄愬彲鑳斤紒' }
],
link: [
{
rel: 'stylesheet',
href: 'https://cdnjs.cloudflare.com/ajax/libs/normalize/8.0.1/normalize.min.css'
}
],
script: [
{
src: 'https://cdnjs.cloudflare.com/ajax/libs/jquery/3.6.0/jquery.min.js',
async: true,
body: true,
}
]
});
});
</script>
<script setup>
import {onMounted} from "vue";
import { useMeta } from 'vue-meta';
onMounted(() => {
useMeta({
title: '绗戝皬鏋煃?- 绋嬪簭鍛樼殑涓栧妗冩簮',
meta: [
{ name: 'keywords', content: '绗戝皬鏋?java,SpringBoot,绋嬪簭鍛? },
{ name: 'description', content: '娆㈣繋鏉ュ埌绗戝皬鏋紝鎴戜滑鑷村姏浜庢墦閫犱竴涓紑鏀俱€佸弸濂界殑鎶€鏈ぞ鍖猴紝璁╃煡璇嗗拰鏅烘収鍦ㄨ繖閲岃嚜鐢辩鎾炪€佺唤鏀俱€傛杩庡姞鍏ユ垜浠殑鏃呯▼锛屼竴璧峰湪鎶€鏈殑娴锋磱涓帰绱㈡棤闄愬彲鑳斤紒' }
],
link: [
{
rel: 'stylesheet',
href: 'https://cdnjs.cloudflare.com/ajax/libs/normalize/8.0.1/normalize.min.css'
}
],
script: [
{
src: 'https://cdnjs.cloudflare.com/ajax/libs/jquery/3.6.0/jquery.min.js',
async: true,
body: true,
}
]
});
});
</script>
onMounted
涓紝鍥犱负鏁版嵁涔熸槸鏀惧湪onMounted
涓紝鎰熻鏁版嵁娌℃湁璇锋眰瀹岋紝meta灏辨覆鏌撳畬浜嗭紝瀵艰嚧涓嶇敓鏁堬紝鏆傛椂涓嶇煡閬撴€庝箞瑙e喅锛岃鎵撹鎾炰腑锛屽姩鎬佺殑椤甸潰浼氳Е鍙?code>onUpdated浜嬩欢锛屼簬鏄氨鍙栧阀鏀惧湪浜?code>onUpdated涓紝甯屾湜鐭ラ亾鍘熷洜鐨勫墠绔ぇ浣彲浠ョ粰浜堟寚瀵拣煓忦煓忦煓?/li>
鐪佺暐浜嗛潪鍏抽敭鎬т唬鐮侊紝姝e父涓嶄細瑙﹀彂onUpdated
锛岃繖閲岃鎵撹鎾炰簡鈥?/p>
wget https://nodejs.org/dist/v17.9.0/node-v17.9.0-linux-x64.tar.gz
- 瑙e帇瀹夎鍖?/li>
tar -xzvf node-v17.9.0-linux-x64.tar.gz
- 璁剧疆杞繛鎺ワ紝寤虹珛蹇嵎鍛戒护
ln -s /opt/node-v17.9.0-linux-x64/bin/node /usr/local/bin/
ln -s /opt/node-v17.9.0-linux-x64/bin/npm /usr/local/bin/
- 浣跨敤
node -v
鏌ョ湅鏄惁瀹夎鎴愬姛
npm install puppeteer --save
npm install express
npm install html-minifier
娉ㄦ剰锛氬洜涓簆uppeteer鏈€鏂扮増鏈彲鑳介渶瑕佺殑node鐗堟湰涓嶅悓锛屽彲浠ラ€氳繃package.json
鎸囧畾puppeteer鐗堟湰 鏈枃浣跨敤鐨勭増鏈?/p>
{
"dependencies": {
"express": "^4.18.2",
"html-minifier": "^4.0.0",
"puppeteer": "19.8.0"
}
}
- 瀹夎渚濊禆搴?/li>
yum install pango.x86_64 libXcomposite.x86_64 libXcursor.x86_64 libXdamage.x86_64 libXext.x86
4.3 鍒涘缓鏈嶅姟鍣ㄧ杩愯鑴氭湰
- 鍒涘缓娓叉煋璇锋眰鐨勯〉闈㈣剼鏈細spider.js
const puppeteer = require('./node_modules/puppeteer');//鐢变簬鐩綍涓嶄竴鑷达紝鎵€浠ヤ娇鐢ㄧ殑鏄粷瀵硅矾寰?/span>
const WSE_LIST = require('./puppeteer-pool.js'); //杩欓噷娉ㄦ剰鏂囦欢鐨勮矾寰勫拰鏂囦欢鍚?/span>
const spider = async (url) => {
let tmp = Math.floor(Math.random() * WSE_LIST.length);
//闅忔満鑾峰彇娴忚鍣?/span>
let browserWSEndpoint = WSE_LIST[tmp];
//杩炴帴
const browser = await puppeteer.connect({
browserWSEndpoint
});
//鎵撳紑涓€涓爣绛鹃〉
var page = await browser.newPage();
// Intercept network requests.
await page.setRequestInterception(true);
page.on('request', req => {
// Ignore requests for resources that don't produce DOM
// (images, stylesheets, media).
const whitelist = ['document', 'script', 'xhr', 'fetch'];
if (!whitelist.includes(req.resourceType())) {
return req.abort();
}
// Pass through all other requests.
req.continue();
});
//鎵撳紑缃戦〉
await page.goto(url, {
timeout: 20000, //杩炴帴瓒呮椂鏃堕棿锛屽崟浣峬s
waitUntil: 'networkidle0' //缃戠粶绌洪棽璇存槑宸插姞杞藉畬姣?/span>
});
//鑾峰彇娓叉煋濂界殑椤甸潰婧愮爜銆備笉寤鸿浣跨敤await page.content();鑾峰彇椤甸潰锛屽洜涓哄湪鎴戞祴璇曚腑鍙戠幇锛岄〉闈㈣繕娌℃湁瀹屽叏鍔犺浇銆傚氨鑾峰彇鍒颁簡銆傞〉闈㈡簮鐮佷笉瀹屾暣銆備篃灏辨槸鍔ㄦ€佽矾鐢辨病鏈夊姞杞姐€倂ue璺敱涔熼厤缃簡history妯″紡
let html = await page.evaluate(() => {
return document.getElementsByTagName('html')[0].outerHTML;
});
await page.close();
return html;
}
module.exports = spider;
- 鍒涘缓浼樺寲puppeteer鎬ц兘瑙掓湰锛岄粯璁や笉鍔犺浇涓€浜涘浣欑殑鍔熻兘锛屾彁楂樿闂晥鐜囷細puppeteer-pool.js
const puppeteer = require('./node_modules/puppeteer');
const MAX_WSE = 2; //鍚姩鍑犱釜娴忚鍣?
let WSE_LIST = []; //瀛樺偍browserWSEndpoint鍒楄〃
//璐熻浇鍧囪
(async () => {
for (var i = 0; i < MAX_WSE; i++) {
const browser = await puppeteer.launch({
//鏃犲ご妯″紡
headless: true,
//鍙傛暟
args: [
'--disable-gpu',
'--disable-dev-shm-usage',
'--disable-setuid-sandbox',
'--no-first-run',
'--no-sandbox',
'--no-zygote',
'--single-process'
],
//涓€鑸笉闇€瑕侀厤缃繖鏉★紝闄ら潪鍚姩涓€鐩存姤閿欐壘涓嶅埌璋锋瓕娴忚鍣?/span>
//executablePath:'chrome.exe鍦ㄤ綘鏈満涓婄殑璺緞锛屼緥濡侰:/Program Files/Google/chrome.exe'
});
let browserWSEndpoint = await browser.wsEndpoint();
WSE_LIST.push(browserWSEndpoint);
}
})();
module.exports = WSE_LIST
- 鍒涘缓鏈嶅姟绔惎鍔ㄨ剼鏈細service.js
闇€瑕佸拰spider.js鏀惧湪涓€涓洰褰?/p>
https://www.xiaoxiaofeng.com闇€瑕佹浛鎹㈡垚浣犺嚜宸辩殑鍩熷悕
const express = require('./node_modules/express');
var app = express();
var spider = require("./spider.js");
var minify = require('html-minifier').minify;
app.get('*', async (req, res) => {
let url = "https://www.xiaoxiaofeng.com" + req.originalUrl;
console.log('璇锋眰鐨勫畬鏁碪RL锛? + url);
let content = await spider(url).catch((error) => {
console.log(error);
res.send('鑾峰彇html鍐呭澶辫触');
return;
});
// 閫氳繃minify搴撳帇缂╀唬鐮?/span>
content=minify(content,{removeComments: true,collapseWhitespace: true,minifyJS:true, minifyCSS:true});
res.send(content);
});
app.listen(3000, () => {
console.log('鏈嶅姟宸插惎鍔紒');
});
- 鎵ц鍚姩puppeteer鍛戒护
nohup node server.js &
鍚姩鎴愬姛鍚庯紝鍙互閫氳繃tail -f nohup.out
鏌ョ湅鏃ュ織锛屽嚭鐜?code>鏈嶅姟宸插惎鍔紒鍒欎唬琛ㄨ繍琛屾垚鍔燂紝鏈熼棿鍙兘浼氬嚭鐜板悇寮忓悇鏍风殑闂锛屽啀鐧惧害涓€涓嬪惂锛岃繖閲屽氨涓嶄竴涓€鍒椾妇浜嗐€?/p>
鐩稿綋涓庡惎鍔ㄤ簡涓€涓鍙d负3000鐨刾uppeteer鏈嶅姟銆?/p>
鍚姩鐨勬椂鍊欏彲鑳界鍙e崰鐢?3000琚崰鐢ㄧ殑璇濆氨鎹竴涓叾浠栫鍙c€?/p>
鍚屾椂涔熶細鍦?root/.cache/puppeteer/chrome/涓嬭涓€涓搴旂増鏈殑璋锋瓕娴忚鍣?/p>
杩欐牱灏卞彲浠ヤ簡锛岃鎴戜滑涓€璧锋潵娴嬭瘯涓€涓嬪惂
棣栧厛鎴戜滑鍏堟甯哥殑璇锋眰锛屼笉鍔犺姹傚ご锛岃姹傜粨鏋滃涓嬶紝鍙互鐪嬪埌鏄病鏈夋覆鏌撶殑vue椤甸潰銆?/p>
PS锛氳帿鍚嶅濡欎笉鐭ラ亾浼樺寲浜嗗暐锛岄€熷害涓€涓嬫彁涓婃潵浜嗭紝鐧惧害鐖櫕浠庡師鏉ョ殑7s闄嶅埌浜?s锛屼笂鏂囦腑鐨勪唬鐮佸凡缁忔槸浼樺寲鍚庢渶鏂扮殑浠g爜銆?/p>
6. 鍏充簬绗戝皬鏋?/h3>
鏈枃鍒版灏辩粨鏉熶簡锛屽鏋滃府鍔╁埌浣犱簡锛屽府蹇欑偣涓禐馃憤
馃惥鎴戞槸绗戝皬鏋紝鍏ㄧ綉鐨嗗彲鎼滅殑銆愮瑧灏忔灚銆?/p>