-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy path21_Full_Text_Search.sql
More file actions
284 lines (261 loc) · 9.56 KB
/
21_Full_Text_Search.sql
File metadata and controls
284 lines (261 loc) · 9.56 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
/**************************************************************
* MySQL 8.0 Full-Text Search Tutorial
* This script demonstrates full-text search in MySQL
* using FULLTEXT indexes and the MATCH...AGAINST syntax.
* It covers:
* - Creating tables with FULLTEXT indexes on InnoDB.
* - Inserting a sample document dataset.
* - Natural language mode searches with relevance ranking.
* - Boolean mode searches with operators (+, -, *, ~, "").
* - Query expansion mode for broader result sets.
* - Stopwords, minimum word length, and parser settings.
* - Adding FULLTEXT indexes to existing tables.
* - Performance and maintenance notes.
**************************************************************/
-------------------------------------------------
-- Region: 0. Initialization
-------------------------------------------------
/*
Use the course database.
*/
USE mysql_course;
DROP TABLE IF EXISTS documents;
-------------------------------------------------
-- Region: 1. Creating the Table with a FULLTEXT Index
-------------------------------------------------
/*
1.1 Create a documents table with a FULLTEXT index on title and body.
InnoDB supports FULLTEXT indexes from MySQL 5.6.
The index covers both columns so a single MATCH expression can
search across them.
*/
CREATE TABLE documents
(
document_id INT PRIMARY KEY AUTO_INCREMENT,
title VARCHAR(255) NOT NULL,
body TEXT NOT NULL,
file_type VARCHAR(10) NOT NULL DEFAULT 'txt',
security_level INT NOT NULL DEFAULT 1,
indexed_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
FULLTEXT INDEX ft_documents (title, body)
) ENGINE = InnoDB;
-------------------------------------------------
-- Region: 2. Inserting Sample Data
-------------------------------------------------
/*
2.1 Insert a small representative dataset.
In production you would load thousands of rows; the patterns
demonstrated here scale to large tables.
*/
INSERT INTO documents (title, body, file_type)
VALUES
('Introduction to MySQL',
'MySQL is a popular open-source relational database management system. '
'It supports full-text search on InnoDB and MyISAM storage engines.',
'pdf'),
('MySQL Performance Tuning',
'Performance tuning in MySQL involves index design, query optimization, '
'InnoDB buffer pool sizing, and slow query log analysis.',
'pdf'),
('Full-Text Search Capabilities',
'MySQL full-text search supports natural language mode, boolean mode, '
'and query expansion. The MATCH...AGAINST syntax drives all three modes.',
'docx'),
('Database Management Best Practices',
'Best practices for database management include regular backups, '
'index maintenance, statistics updates, and capacity planning.',
'pdf'),
('MySQL 8.0 New Features',
'MySQL 8.0 introduces window functions, CTEs, invisible indexes, '
'descending indexes, and improved JSON support.',
'docx'),
('Data Analysis with SQL',
'SQL enables powerful data analysis using aggregation, window functions, '
'subqueries, and set operations across relational tables.',
'txt'),
('MySQL Security Hardening',
'Securing MySQL requires strong authentication, encrypted connections, '
'role-based privileges, and regular audit log reviews.',
'pdf'),
('Index Design Patterns',
'Composite indexes, covering indexes, functional indexes, and prefix '
'indexes are the main patterns for MySQL performance optimization.',
'docx');
-------------------------------------------------
-- Region: 3. Natural Language Mode Search
-------------------------------------------------
/*
3.1 Default MATCH...AGAINST uses IN NATURAL LANGUAGE MODE.
MySQL scores each row by relevance; higher score = better match.
Rows below the relevance threshold are filtered automatically.
*/
SELECT
document_id,
title,
ROUND(MATCH(title, body) AGAINST ('MySQL database management'), 4) AS relevance_score
FROM documents
WHERE MATCH(title, body) AGAINST ('MySQL database management')
ORDER BY relevance_score DESC;
/*
3.2 Show documents most relevant to full-text search topics.
*/
SELECT
document_id,
title,
ROUND(MATCH(title, body) AGAINST ('full-text search capabilities'), 4) AS relevance_score
FROM documents
WHERE MATCH(title, body) AGAINST ('full-text search capabilities')
ORDER BY relevance_score DESC;
-------------------------------------------------
-- Region: 4. Boolean Mode Search
-------------------------------------------------
/*
4.1 Boolean mode gives you explicit operators to control inclusion,
exclusion, and weighting:
+word word must be present
-word word must not be present
word* prefix wildcard
"phrase" exact phrase match
~word lower relevance if word present
>word increase relevance weight
<word decrease relevance weight
*/
/*
4.2 Require "MySQL" and exclude documents mentioning "security".
*/
SELECT
document_id,
title
FROM documents
WHERE MATCH(title, body) AGAINST ('+MySQL -security' IN BOOLEAN MODE);
/*
4.3 Exact phrase search.
*/
SELECT
document_id,
title
FROM documents
WHERE MATCH(title, body) AGAINST ('"full-text search"' IN BOOLEAN MODE);
/*
4.4 Prefix wildcard: match any word starting with "optim".
*/
SELECT
document_id,
title
FROM documents
WHERE MATCH(title, body) AGAINST ('optim*' IN BOOLEAN MODE);
/*
4.5 Boost relevance of documents that mention "performance".
*/
SELECT
document_id,
title,
ROUND(MATCH(title, body) AGAINST ('+MySQL >performance' IN BOOLEAN MODE), 4) AS score
FROM documents
WHERE MATCH(title, body) AGAINST ('+MySQL >performance' IN BOOLEAN MODE)
ORDER BY score DESC;
-------------------------------------------------
-- Region: 5. Query Expansion Mode
-------------------------------------------------
/*
5.1 WITH QUERY EXPANSION performs a two-step search:
first pass ranks results by the original query; second pass
expands the query with the most relevant words found in those
results and re-ranks. Useful when a search phrase is very short.
*/
SELECT
document_id,
title,
ROUND(MATCH(title, body) AGAINST ('database' WITH QUERY EXPANSION), 4) AS relevance_score
FROM documents
WHERE MATCH(title, body) AGAINST ('database' WITH QUERY EXPANSION)
ORDER BY relevance_score DESC;
-------------------------------------------------
-- Region: 6. Adding a FULLTEXT Index to an Existing Table
-------------------------------------------------
/*
6.1 Drop-and-recreate approach for demonstration.
ALTER TABLE is the standard way to add a FULLTEXT index post hoc.
*/
DROP TABLE IF EXISTS articles;
CREATE TABLE articles
(
article_id INT PRIMARY KEY AUTO_INCREMENT,
headline VARCHAR(255) NOT NULL,
content MEDIUMTEXT NOT NULL,
published DATE NOT NULL
) ENGINE = InnoDB;
INSERT INTO articles (headline, content, published)
VALUES
('MySQL 8.0 Released',
'The MySQL team announced version 8.0 with major improvements to '
'performance, security, and developer-facing features.',
'2018-04-19'),
('InnoDB Full-Text Improvements',
'InnoDB now supports full-text search with the ngram and MeCab parsers '
'for CJK and Japanese languages respectively.',
'2020-06-01');
/*
6.2 Add FULLTEXT index after table creation.
For large tables this operation runs online (no blocking DML in 8.0).
*/
ALTER TABLE articles ADD FULLTEXT INDEX ft_articles (headline, content);
/*
6.3 Verify by running a search on the new index.
*/
SELECT article_id, headline
FROM articles
WHERE MATCH(headline, content) AGAINST ('InnoDB full-text' IN BOOLEAN MODE);
-------------------------------------------------
-- Region: 7. Inspecting and Maintaining FULLTEXT Indexes
-------------------------------------------------
/*
7.1 List all FULLTEXT indexes in the current database.
*/
SELECT
TABLE_NAME,
INDEX_NAME,
COLUMN_NAME,
INDEX_TYPE
FROM INFORMATION_SCHEMA.STATISTICS
WHERE TABLE_SCHEMA = DATABASE()
AND INDEX_TYPE = 'FULLTEXT'
ORDER BY TABLE_NAME, INDEX_NAME, SEQ_IN_INDEX;
/*
7.2 Check FULLTEXT index configuration variables.
ft_min_word_len (MyISAM) / innodb_ft_min_token_size (InnoDB)
control the minimum token length (default 3).
Words shorter than this threshold are ignored.
*/
SHOW VARIABLES LIKE 'innodb_ft_%';
SHOW VARIABLES LIKE 'ft_%';
/*
7.3 After bulk inserts, you can manually sync the FULLTEXT index cache
to the on-disk auxiliary tables.
*/
-- OPTIMIZE TABLE documents; -- also defragments the table
/*
7.4 Query InnoDB auxiliary FULLTEXT tables (requires super or
innodb_ft_aux_table set to the target table).
*/
-- SET GLOBAL innodb_ft_aux_table = 'mysql_course/documents';
-- SELECT * FROM INFORMATION_SCHEMA.INNODB_FT_INDEX_CACHE LIMIT 20;
-- SELECT * FROM INFORMATION_SCHEMA.INNODB_FT_INDEX_TABLE LIMIT 20;
-------------------------------------------------
-- Region: 8. Security-Filtered Search
-------------------------------------------------
/*
8.1 Combine FULLTEXT relevance scoring with a conventional WHERE predicate
to enforce access control at the application layer.
*/
SELECT
document_id,
title,
ROUND(MATCH(title, body) AGAINST ('MySQL optimization'), 4) AS relevance_score
FROM documents
WHERE MATCH(title, body) AGAINST ('MySQL optimization')
AND security_level <= 1 -- only show public documents
ORDER BY relevance_score DESC;
-------------------------------------------------
-- Region: End of Script
-------------------------------------------------