Commit ce159a47 by Sxy

Initial commit

parents
node_modules/
package-lock.json
.DS_Store
const Koa = require('koa')
const app = new Koa()
const views = require('koa-views')
const json = require('koa-json')
const onerror = require('koa-onerror')
const bodyparser = require('koa-bodyparser')
const logger = require('koa-logger')
const index = require('./routes/index')
// error handler
onerror(app)
// middlewares
app.use(bodyparser({
enableTypes: ['json', 'form', 'text']
}))
app.use(json())
app.use(logger())
app.use(require('koa-static')(__dirname + '/public'))
app.use(views(__dirname + '/views', {
extension: 'html'
}))
// logger
app.use(async (ctx, next) => {
const start = new Date()
await next()
const ms = new Date() - start
console.log(`${ctx.method} ${ctx.url} - ${ms}ms`)
})
// routes
app.use(index.routes(), index.allowedMethods())
// error-handling
app.on('error', (err, ctx) => {
console.error('server error', err, ctx)
});
module.exports = app
#!/usr/bin/env node
/**
* Module dependencies.
*/
var app = require('../app');
var debug = require('debug')('demo:server');
var http = require('http');
/**
* Get port from environment and store in Express.
*/
var port = normalizePort(process.env.PORT || '3000');
// app.set('port', port);
/**
* Create HTTP server.
*/
var server = http.createServer(app.callback());
/**
* Listen on provided port, on all network interfaces.
*/
server.listen(port);
server.on('error', onError);
server.on('listening', onListening);
/**
* Normalize a port into a number, string, or false.
*/
function normalizePort(val) {
var port = parseInt(val, 10);
if (isNaN(port)) {
// named pipe
return val;
}
if (port >= 0) {
// port number
return port;
}
return false;
}
/**
* Event listener for HTTP server "error" event.
*/
function onError(error) {
if (error.syscall !== 'listen') {
throw error;
}
var bind = typeof port === 'string'
? 'Pipe ' + port
: 'Port ' + port;
// handle specific listen errors with friendly messages
switch (error.code) {
case 'EACCES':
console.error(bind + ' requires elevated privileges');
process.exit(1);
case 'EADDRINUSE':
console.error(bind + ' is already in use');
process.exit(1);
default:
throw error;
}
}
/**
* Event listener for HTTP server "listening" event.
*/
function onListening() {
var addr = server.address();
var bind = typeof addr === 'string'
? 'pipe ' + addr
: 'port ' + addr.port;
debug('Listening on ' + bind);
}
const { Client } = require('@elastic/elasticsearch')
module.exports = new Client({
node: "http://192.168.18.102:9200/",
// auth: {
// username: 'admines',
// password: 'adminGSBes'
// },
maxRetries: 2,
requestTimeout: 1000 * 10,
})
\ No newline at end of file
const mongoose = require('mongoose');
const config = {
dbPath: "mongodb://192.168.18.101:27017/trademark"
};
mongoose.set('debug', true)
mongoose.connect(config.dbPath, {
useNewUrlParser: true,
useUnifiedTopology: true,
useCreateIndex: true
}, function (err) {
if (err) {
console.error(` MongoDB 连接失败 ${err.message} `);
process.exit(1);
}
})
mongoose.connection.on('disconnected', () => {
})
mongoose.connection.on('error', err => {
console.error(err)
})
mongoose.connection.on('open', async () => {
console.log('Connected to MongoDB ', config.dbPath)
})
module.exports = mongoose
\ No newline at end of file
const mongoose = require('../client/mongoClient');
const nameSchema = require("./name")
exports.Name = mongoose.model('Name', nameSchema);
\ No newline at end of file
const mongoose = require('../client/mongoClient');
const nameSchema = new mongoose.Schema({
name: { type: String, unique: true }
})
module.exports = nameSchema;
{
"name": "trademarksimilar",
"version": "1.0.0",
"description": "",
"main": "index.js",
"scripts": {
"start": "node bin/www",
"test": "echo \"Error: no test specified\" && exit 1"
},
"author": "",
"license": "ISC",
"dependencies": {
"@elastic/elasticsearch": "^7.14.0",
"cheerio": "^1.0.0-rc.10",
"mongoose": "^5.13.7",
"puppeteer": "^10.2.0",
"koa": "^2.7.0",
"koa-bodyparser": "^4.2.1",
"koa-convert": "^1.2.0",
"koa-json": "^2.0.2",
"koa-logger": "^3.2.0",
"koa-onerror": "^4.1.0",
"koa-router": "^7.4.0",
"koa-static": "^5.0.0",
"koa-views": "^6.2.0",
"axios": "^0.21.1"
}
}
\ No newline at end of file
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
const router = require('koa-router')()
const start = require("../start")
const es = require("../client/esclient");
router.get('/', async (ctx, next) => {
await ctx.render('index')
})
router.post('/list', async (ctx, next) => {
const { page, limit, conditions, name } = ctx.request.body;
let query = await start(name, conditions)
query = {
query: {
bool: {
should: query
}
},
"from": (page - 1) * limit,
"size": limit,
}
console.log(JSON.stringify(query))
const data = await es.search({
index: "test4",
body: query
})
const { body: { hits: { total, hits } } } = data;
ctx.body = {
list: hits.map(item => {
return {
name: item._source.name
}
}),
count: total.value,
query
}
})
module.exports = router
This diff is collapsed. Click to expand it.
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE properties SYSTEM "http://java.sun.com/dtd/properties.dtd">
<properties>
<comment>IK Analyzer 扩展配置</comment>
<!--用户可以在这里配置自己的扩展字典 -->
<entry key="ext_dict">kuozhanci.dic</entry>
<!--用户可以在这里配置自己的扩展停止词字典-->
<!-- <entry key="ext_stopwords">tingyongci.dic</entry> -->
<!--用户可以在这里配置远程扩展字典 -->
<!-- <entry key="remote_ext_dict">words_location</entry> -->
<!--用户可以在这里配置远程扩展停止词字典-->
<!-- <entry key="remote_ext_stopwords">words_location</entry> -->
</properties>
\ No newline at end of file
// 反反爬设置
const puppeteer = require('puppeteer');
async function anti_detection(page) {
// await page.emulate(puppeteer.devices["Microsoft Lumia 950 landscape"]);
await page.evaluateOnNewDocument(() => {
const newProto = navigator.__proto__;
delete newProto.webdriver; //删除navigator.webdriver字段
navigator.__proto__ = newProto;
window.chrome = {}; //添加window.chrome字段,为增加真实性还需向内部填充一些值
window.chrome.app = { "InstallState": "hehe", "RunningState": "haha", "getDetails": "xixi", "getIsInstalled": "ohno" };
window.chrome.csi = function () { };
window.chrome.loadTimes = function () { };
window.chrome.runtime = function () { };
Object.defineProperty(navigator, 'userAgent', { //userAgent在无头模式下有headless字样,所以需覆写
get: () => "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.113 Safari/537.36",
});
Object.defineProperty(navigator, 'plugins', { //伪装真实的插件信息
get: () => [{
"description": "Portable Document Format",
"filename": "internal-pdf-viewer",
"length": 1,
"name": "Chrome PDF Plugin"
}]
});
Object.defineProperty(navigator, 'languages', { //添加语言
get: () => ["zh-CN", "zh", "en"],
});
const originalQuery = window.navigator.permissions.query; //notification伪装
window.navigator.permissions.query = (parameters) => (
parameters.name === 'notifications' ?
Promise.resolve({ state: Notification.permission }) :
originalQuery(parameters))
});
await page.setViewport({
width: 1366,
height: 1024,
deviceScaleFactor: 1
});
}
module.exports = { anti_detection }
\ No newline at end of file
{
"settings": {
"number_of_shards": 1,
"number_of_replicas": 0,
"analysis": {
"analyzer": {
"shouzimu_pinyin": {
"tokenizer": "shouzimu_pinyin"
},
"all_pinyin": {
"tokenizer": "all_pinyin"
},
"xingjinzi": {
"char_filter": [
"fantizi"
],
"tokenizer": "standard",
"filter": [
"lowercase",
"xingjinzi"
]
},
"tongyici": {
"char_filter": [
"fantizi"
],
"tokenizer": "ik_max_word",
"filter": [
"lowercase",
"tongyici"
]
},
"jingque": {
"char_filter": [
"fantizi"
],
"tokenizer": "keyword",
"filter": [
"lowercase"
]
},
"mypattern": {
"char_filter": [
"fantizi"
],
"tokenizer": "mypattern",
"filter": [
"lowercase"
]
}
},
"filter": {
"xingjinzi": {
"type": "synonym",
"synonyms_path": "xingjinzi.txt"
},
"tongyici": {
"type": "synonym",
"synonyms_path": "tongyici.txt"
}
},
"tokenizer": {
"shouzimu_pinyin": {
"type": "pinyin",
"keep_first_letter": true,
"keep_full_pinyin": false
},
"all_pinyin": {
"type": "pinyin",
"keep_first_letter": false,
"keep_joined_full_pinyin": true,
"keep_none_chinese_in_joined_full_pinyin": true
},
"mypattern": {
"type": "pattern",
"pattern": ","
}
},
"char_filter": {
"fantizi": {
"type": "stconvert",
"convert_type": "t2s"
}
}
}
},
"mappings": {
"properties": {
"name": {
"type": "keyword",
"fields": {
"standard": {
"type": "text",
"analyzer": "standard",
"search_analyzer": "standard"
},
"jingque": {
"type": "text",
"analyzer": "jingque",
"search_analyzer": "jingque"
},
"english": {
"type": "text",
"analyzer": "english",
"search_analyzer": "english"
},
"all_pinyin": {
"type": "text",
"analyzer": "all_pinyin",
"search_analyzer": "all_pinyin"
},
"xingjinzi": {
"type": "text",
"analyzer": "xingjinzi",
"search_analyzer": "xingjinzi"
},
"tongyici": {
"type": "text",
"analyzer": "tongyici",
"search_analyzer": "tongyici"
}
}
},
"length": {
"type": "integer"
},
"cnlength": {
"type": "integer"
},
"enlength": {
"type": "integer"
},
"otherlength": {
"type": "integer"
}
}
}
}
\ No newline at end of file
const es = require("../client/esclient");
const { Name } = require("../model/index")
// 获取所有商标名
async function start() {
let search_after = ['3.3490871E7']
while (search_after && search_after.length > 0) {
try {
console.log(search_after)
let data = await es.search({
index: "bigdata_middle_gsb_tm_info",
body: {
"query": {
"bool": {
"must": [
{
"exists": {
"field": "tm_name"
}
}
],
"must_not": [
{
"term": {
"tm_name": "图形"
}
}
]
}
},
"_source": ["tm_name", "_id"],
"search_after": search_after,
"size": 10000,
"sort": [
{
"_id": "asc"
}
]
}
})
let { body: { hits: { hits } } } = data;
let array = [];
for (let val of hits) {
const { _source, sort } = val;
if (_source.tm_name.trim()) {
console.log(_source.tm_name, sort)
array.push({ name: _source.tm_name });
// try {
// await new Name().save({ name: _source.tm_name });
// } catch (err) {
// console.log(err)
// }
}
}
try {
await Name.insertMany(array, { ordered: false });
} catch (err) {
console.log(err)
}
if (hits.length <= 0) {
search_after = false
} else {
let { _source, sort } = hits[hits.length - 1];
search_after = sort
console.log(search_after)
}
} catch (err) {
console.log(err)
}
}
}
start()
\ No newline at end of file
const { Name } = require("../model/index");
const es = require("../client/esclient");
/**
* 往 es中 洗数据
*/
async function start() {
let where = {
_id: {
$gt: "611d4f9895c286d09c699513"
}
}
let i = 0;
while (where) {
try {
const data = await Name.find(where).sort({ _id: 1 }).limit(10000);
if (data.length > 0) {
let array = []
console.log(i++)
console.log("1:", data[0]._id)
for (let val of data) {
array.push({ "index": { "_index": "test3" } }, {
name: val.name.trim(),
length: val.name.trim().length,
cnlength: val.name.trim().replace(/[^\u4E00-\u9FA5]/g, '').length,
enlength: val.name.trim().replace(/[^a-zA-Z]/g, '').length,
otherlength: val.name.trim().replace(/\s*/g, "").length - val.name.trim().replace(/[^\u4E00-\u9FA5]/g, '').length - val.name.trim().replace(/[^a-zA-Z]/g, '').length
})
}
await es.bulk({
index: "test3",
body: array
})
console.log("2:", data[data.length - 1]._id)
where = {
_id: {
$gt: data[data.length - 1]._id
}
}
} else {
where = false
}
} catch (err) {
console.log(err)
}
}
cosnole.log("结束")
}
start()
永远的神
学编程
\ No newline at end of file
# 精确查询 (繁体字【阿裏巴巴】、大小写【alibaBa】)
POST /test2/_search
{
"query": {
"bool": {
"must": [
{
"match": {
"name.jingque": "阿裏巴巴"
}
}
]
}
},
"size": 10000
}
# 前后缀搜索 (wildcard)
POST /test2/_search
{
"query": {
"bool": {
"must": [
{
"match": {
"name.standard":{
"query": "红米",
"operator": "and"
}
}
},
{
"wildcard": {
"name.jingque": {
"value": "*红米"
}
}
}
],
"must_not": [
{
"term": {
"name.jingque": {
"value": "红米"
}
}
}
]
}
},
"size": 10000
}
# 减字
POST /test2/_search
{
"query": {
"bool": {
"must": [
{
"match": {
"name.standard": {
"query": "红浪漫",
"minimum_should_match": 2
}
}
},
{
"script": {
"script": "doc['name'].value.length() === 2 "
}
}
],
"must_not": [
{
"term": {
"name.jingque": {
"value": "红浪漫"
}
}
}
]
}
},
"size": 10000
}
#读音相同
POST /test2/_search
{
"query": {
"bool": {
"must": [
{
"script": {
"script": "doc['name'].value.length() === 3 "
}
},
{
"match_phrase": {
"name.all_pinyin": "红浪漫"
}
}
],
"must_not": [
{
"term": {
"name.jingque": {
"value": "红浪漫"
}
}
}
]
}
},
"size": 2000
}
#形近字 (土星、士星)
POST /test2/_search
{
"query": {
"bool": {
"must": [
{
"script": {
"script": "doc['name'].value.length() === 2 "
}
},
{
"match": {
"name.standard": {
"query": "土星",
"minimum_should_match": 1
}
}
},
{
"match_phrase": {
"name.standard": {
"analyzer": "xingjinzi",
"query": "土星"
}
}
}
],
"must_not": [
{
"term": {
"name.jingque": {
"value": "土星"
}
}
}
]
}
},
"size": 10000
}
#近似词 (巧媳妇,巧老婆)
POST /test2/_search
{
"query": {
"bool": {
"must": [
{
"script": {
"script": "doc['name'].value.length() === 3 "
}
},
{
"match_phrase": {
"name.tongyici":{
"query": "巧媳妇"
}
}
}
],
"must_not": [
{
"term": {
"name.jingque": {
"value": "巧媳妇"
}
}
},{
"wildcard": {
"name.jingque": {
"value": "*巧媳妇*"
}
}
}
]
}
},
"size": 10000
}
\ No newline at end of file
const puppeteer = require('puppeteer');
const cheerio = require('cheerio');
const commonUtil = require("./common");
const fs = require("fs")
const os = require("os")
let fileName = "tongyici.txt"
// 获取同义词
async function start() {
const browser = await puppeteer.launch({
headless: false,
slowMo: 100,
ignoreDefaultArgs: ['--enable-automation'],
args: [
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-blink-features=AutomationControlled',
],
dumpio: false,
});
const page = await browser.newPage();
await commonUtil.anti_detection(page);
await page.setViewport({
width: 1920,
height: 1080
});
await page.goto("https://xuewen.cnki.net/DetailList.aspx?code=&key=&bid=R201109323&cid=&type=&sort=0&otype=0&page=219");
// 列表爬取
let active = true;
while (active) {
await page.waitForSelector('.searchResultIfm');
const html = await page.content();
buildListData(html)
// 解析 html
active = await page.$('#apage_end');
if (active) {
await page.click('#apage_next');
await page.waitForTimeout(1000);
}
}
}
function buildListData(html = '') {
const $ = cheerio.load(html);
let result = [];
$(".title").each(function (item) {
$(this).find('a').each(function () {
console.log($(this).text().trim().split("——").join(","))
fs.appendFileSync(fileName, $(this).text().trim().split("——").join(",") + os.EOL)
})
});
return result
}
start()
\ No newline at end of file
This source diff could not be displayed because it is too large. You can view the blob instead.
const es = require("./client/esclient");
const fs = require("fs")
const os = require("os")
// 获取形近字
async function start() {
let data = await es.search({
index: "bigdata_similar_filter",
body: {
"query": {
"match_all": {
}
},
"size": 2000
}
})
let { body: { hits: { hits } } } = data;
let fileName = "xingjinzi.txt"
for (let val of hits) {
const { _source } = val;
console.log(_source.name.join(","))
fs.appendFileSync(fileName, _source.name.join(",") + os.EOL)
}
}
start()
\ No newline at end of file
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<!-- import CSS -->
<link rel="stylesheet" href="https://unpkg.com/element-ui/lib/theme-chalk/index.css">
<style>
a {
color: #00A4FF;
text-decoration: none;
outline: none;
-webkit-tap-highlight-color: transparent;
}
a:link,
a:visited {
text-decoration: none;
/*超链接无下划线*/
}
a:hover {
text-decoration: none;
/*鼠标放上去有下划线*/
}
</style>
</head>
<body>
<div id="app">
<el-form :inline="true" :model="formInline" class="demo-form-inline">
<el-form-item label="">
<el-input v-model="formInline.name" placeholder="商标名"></el-input>
</el-form-item>
<el-form-item>
<el-button type="primary" @click="onSubmit">查询</el-button>
</el-form-item>
</el-form>
<el-checkbox :indeterminate="isIndeterminate" v-model="checkAll" @change="handleCheckAllChange">全选</el-checkbox>
<div style="margin: 5px 0;"></div>
<el-checkbox-group v-model="checkedConditions" @change="handleCheckedCitiesChange">
<el-checkbox style="display:block;" v-for="(city,index) in conditions" :label="index+1" :key="index">
{{city}}
<div style="margin: 5px 0;"></div>
</el-checkbox>
</el-checkbox-group>
<el-table :data="tableData" border style="width: 100%" height="800" v-loading="loading">
<el-table-column fixed prop="name" label="商标名">
</el-table-column>
</el-table>
<div style="text-align: center;margin-top: 15px;">
<el-pagination @size-change="handleSizeChange" @current-change="handleCurrentChange" :current-page="page"
:page-sizes="[15,30,50,100,200]" :page-size="limit" layout="total, sizes, prev, pager, next"
:total="total">
</el-pagination>
</div>
</div>
</body>
<!-- import Vue before Element -->
<script src="vue.js"></script>
<!-- import JavaScript -->
<script src="index.js"></script>
<script src="axios.min.js"></script>
<script>
const options = [
"1. 完全相同",
"2. 完全相同加英文/图形符号,如:小米与小米A或A小米",
"3. 倒序或倒序加英文/图形符号,如:小米与米小,全聚德与德聚全;小米与米小A或A米小;",
"4.同音不同字或同音不同字加英文/图形符号,如:华为与铧为,华为与铧为A或A铧为;(同词加字母的组合展示)",
"5. 同音不同字倒序或同音不同字倒序加英文/图形符号,如:华为与为铧或与为铧A或A为铧",
"6. 完全相同后缀加字/词近似,如:小米与小米智慧或小米米等;",
"7. 完全相同前缀加字/词近似,如:小米与智慧小米或米小米等;",
"8. 倒序后缀加字/词近似,如:华为与为华智慧或为华大等;",
"9. 倒序前缀加字/词近似,如:华为与智慧为华或大为华等;",
"10. 相同词中加虚词或相同词中加虚词加英文/图形符号,如:小米与小之米或小之米A;",
"11. 商标文字含义相同/近似(也包含拼音/英文)",
"12. 同字形不同音或同字形不同音加英文/图形符号,如:华为与毕为或毕为A;",
"13. 相同减字/词或相同减字/词加英文/图形符号,如:全聚德与全德或全德A;"
];
new Vue({
el: '#app',
data: function () {
return {
formInline: {
name: ""
},
tableData: [],
total: 0,
limit: 15,
page: 1,
loading: false,
checkAll: false,
checkedConditions: options.map((item, index) => {
return index + 1
}),
conditions: options,
isIndeterminate: true
}
},
methods: {
async initData(page = this.page, limit = this.limit) {
if (this.formInline.name && this.formInline.name != "") {
if (this.checkedConditions.length > 0) {
try {
this.loading = true;
let data = await axios.post(`/list`, {
page: this.page,
limit: this.limit,
conditions: this.checkedConditions,
name: this.formInline.name
});
data = data.data;
this.tableData = data.list;
this.total = data.count;
this.loading = false;
} catch (err) {
alert(err.message)
}
} else {
alert("请勾选条件")
}
} else {
alert("请填写商标名")
}
},
handleSizeChange(val) {
this.limit = val;
this.initData()
},
handleCurrentChange(val) {
this.page = val;
this.initData()
},
onSubmit() {
this.initData()
},
handleCheckAllChange(val) {
this.checkedConditions = val ? options.map((item, index) => {
return index + 1
}) : [];
this.isIndeterminate = false;
},
handleCheckedCitiesChange(value) {
let checkedCount = value.length;
this.checkAll = checkedCount === this.conditions.length;
this.isIndeterminate = checkedCount > 0 && checkedCount < this.conditions.length;
}
},
mounted() {}
})
</script>
</html>
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment