GBase新闻
“G”术时刻 | 在GBase数据库中,如何使用图片进行信息检索
数据检索是SQL中最基础、最常用的操作之一,能够帮助用户从数据库中提取所需的数据。如今,随着AI以及数据库技术的不断发展,数据检索方式已经由传统的单一依托文本检索发展到能够通过声音、图像、视频等多模态技术实现信息的跨模态检索。本文将演示如何使用图像在南大通用GBase 8a数据库内检索数据信息,帮助大家更好地理解和使用这一功能。
部署方式
(1) 在Gbase8a数据库中新建表,并导入数据(包含图片信息),如下例
datafile文件(图片文件按指定目录存放):
1,gou1,./gou1.jpg,小强
2,gou2,./gou2.jpg,旺财
3,mao1,./mao1.jpg,花花
4,mao2,./mao2.jpg,球球
CREATE TABLE "zhao"(
"id" int(11) DEFAULT NULL,
"ename" varchar(50) DEFAULT NULL,
"image_data" longblob, --最大可存储64MB信息
"cname" varchar(50) DEFAULT NULL
)
Load data infile 'file://192.168.1.5//home/gbase/comparepic/datafile.txt' into table test.zhao fields terminated by ',' table_fields 'id,ename,image_data type_url,cname'
--图片在库内以二进制形式存储
(2)新建目录edp,将app.py放入,其内新建templates 和uploads 目录,将index.html文件放入templates 内
(3)执行python.py app
(4)弹出server运行窗口后最小化
使用方式
(1)在浏览器内输入:127.0.0.1:5050
(2)在页面内选择库名,表名,以及包含图像字段的字段名
(3)选择要用来进行检索的图片
(4)选择与该图片匹配程度的值。该值越接近0,匹配程度越低, 越接近1,匹配程度越高
(5)点击“使用图片检索”按钮
演示样例
启动server窗口
库名、 表名、字段名(含图片的), 用来检索的图片文件,都选择好。点击使用图片检索,下方会显示出查询出的记录信息。
参考文件
(1)app.py文件
import osfrom flask import Flask, render_template, request, send_from_directory, jsonifyimport pymysqlfrom io import BytesIOfrom PIL import Imageimport torchimport torchvision.models as modelsimport torchvision.transforms as transformsimport numpy as npfrom werkzeug.utils import secure_filenameimport base64app = Flask(__name__) DB_CONFIG = { 'host': '192.168.1.5', 'user': 'gbase', 'password': 'gbase20110531', 'charset': 'utf8mb4', 'port': 5258} UPLOAD_FOLDER = 'uploads'app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER if not os.path.exists(UPLOAD_FOLDER): os.makedirs(UPLOAD_FOLDER) model = models.resnet50(pretrained=True)model.eval() transform = transforms.Compose([ transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])@app.route('/', methods=['GET', 'POST'])def index(): if request.method == 'POST': dbname = request.form.get('dbname') tbname = request.form.get('tbname') column_name = request.form.get('column_name') sample_file = request.files['sample'] similarity_threshold = float(request.form.get('similarity_threshold')) if sample_file: filename = secure_filename(sample_file.filename) sample_path = os.path.join(app.config['UPLOAD_FOLDER'], filename) sample_file.save(sample_path) sample_image = Image.open(sample_path) sample_image = transform(sample_image).unsqueeze(0) with torch.no_grad(): sample_features = model(sample_image).squeeze().numpy() conn = pymysql.connect(**DB_CONFIG) cursor = conn.cursor() query = f"SELECT * FROM {dbname}.{tbname}" cursor.execute(query) results = cursor.fetchall() matching_records = [] for row in results: image_index = [col[0] for col in cursor.description].index(column_name) image_data = row[image_index] if image_data: candidate_image = Image.open(BytesIO(image_data)) candidate_image = transform(candidate_image).unsqueeze(0) with torch.no_grad(): candidate_features = model(candidate_image).squeeze().numpy() similarity = np.dot(sample_features, candidate_features) / ( np.linalg.norm(sample_features) * np.linalg.norm(candidate_features)) if similarity >= similarity_threshold: encoded_image = base64.b64encode(image_data).decode('utf-8') new_row = list(row) new_row[image_index] = encoded_image matching_records.append(new_row) cursor.close() conn.close() return render_template('index.html', dbnames=get_dbnames(), tbnames=get_tbnames(dbname), column_names=get_column_names(dbname, tbname), sample_image=filename, matching_records=matching_records, columns=[col[0] for col in cursor.description], column_name=column_name) dbnames = get_dbnames() return render_template('index.html', dbnames=dbnames, tbnames=[], column_names=[])def get_dbnames(): conn = pymysql.connect(**DB_CONFIG) cursor = conn.cursor() query = "SELECT distinct dbname FROM gbase.table_distribution" cursor.execute(query) dbnames = [row[0] for row in cursor.fetchall()] cursor.close() conn.close() return dbnamesdef get_tbnames(dbname): conn = pymysql.connect(**DB_CONFIG) cursor = conn.cursor() query = f"SELECT tbname FROM gbase.table_distribution WHERE dbname = '{dbname}'" cursor.execute(query) tbnames = [row[0] for row in cursor.fetchall()] cursor.close() conn.close() return tbnamesdef get_column_names(dbname, tbname): conn = pymysql.connect(**DB_CONFIG) cursor = conn.cursor() query = "SELECT COLUMN_NAME FROM information_schema.COLUMNS WHERE TABLE_SCHEMA = %s AND TABLE_NAME = %s" cursor.execute(query, (dbname, tbname)) column_names = [row[0] for row in cursor.fetchall()] cursor.close() conn.close() return column_names@app.route('/uploads/<filename>')def uploaded_file(filename): return send_from_directory(app.config['UPLOAD_FOLDER'], filename)@app.route('/get_tables')def get_tables(): dbname = request.args.get('dbname') tbnames = get_tbnames(dbname) return jsonify({'tbnames': tbnames})@app.route('/get_columns')def get_columns(): dbname = request.args.get('dbname') tbname = request.args.get('tbname') column_names = get_column_names(dbname, tbname) return jsonify({'column_names': column_names})if __name__ == '__main__': app.run(debug=True)
(2)index.html文件
<!DOCTYPE html><html lang="en"><head> <meta charset="UTF-8"> <meta name="viewport" content="width=device-width, initial-scale=1.0"> <title>图像检索DEMO</title> <script src="https://unpkg.com/@tailwindcss/browser@4"></script> <link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.7.2/css/all.min.css" rel="stylesheet"></head><body class="bg-gray-100 font-sans"> <div class="container mx-auto p-8"> <h1 class="text-3xl font-bold text-center mb-8">图像检索应用</h1> <form method="post" enctype="multipart/form-data" class="bg-white p-6 rounded shadow-md"> <table class="w-full"> <tbody> <tr> <td class="py-2 pr-4"> <label for="dbname" class="block text-sm font-medium text-gray-700">选择库名:</label> </td> <td class="py-2"> <select name="dbname" id="dbname" onchange="updateTables()" class="mt-1 block w-full py-2 px-3 border border-gray-300 bg-white rounded-md shadow-sm focus:outline-none focus:ring-blue-500 focus:border-blue-500 sm:text-sm"> {% for dbname in dbnames %} <option value="{{ dbname }}">{{ dbname }}</option> {% endfor %} </select> </td> </tr> <tr> <td class="py-2 pr-4"> <label for="tbname" class="block text-sm font-medium text-gray-700">选择表名:</label> </td> <td class="py-2"> <select name="tbname" id="tbname" onchange="updateColumns()" class="mt-1 block w-full py-2 px-3 border border-gray-300 bg-white rounded-md shadow-sm focus:outline-none focus:ring-blue-500 focus:border-blue-500 sm:text-sm"> <!-- 初始为空,后续动态填充 --> </select> </td> </tr> <tr> <td class="py-2 pr-4"> <label for="column_name" class="block text-sm font-medium text-gray-700">选择字段名:</label> </td> <td class="py-2"> <select name="column_name" id="column_name" class="mt-1 block w-full py-2 px-3 border border-gray-300 bg-white rounded-md shadow-sm focus:outline-none focus:ring-blue-500 focus:border-blue-500 sm:text-sm"> <!-- 初始为空,后续动态填充 --> </select> </td> </tr> <tr> <td class="py-2 pr-4"> <label for="sample" class="block text-sm font-medium text-gray-700">选择样本图片:</label> </td> <td class="py-2"> <input type="file" name="sample" id="sample" class="mt-1 block w-full py-2 px-3 border border-gray-300 bg-white rounded-md shadow-sm focus:outline-none focus:ring-blue-500 focus:border-blue-500 sm:text-sm"> {% if sample_image %} <img src="{{ url_for('uploaded_file', filename=sample_image) }}" alt="样本图片" width="200" class="mt-2"> {% endif %} </td> </tr> <tr> <td class="py-2 pr-4"> <label for="similarity_threshold" class="block text-sm font-medium text-gray-700">输入匹配度:</label> </td> <td class="py-2"> <input type="number" name="similarity_threshold" id="similarity_threshold" step="0.01" value="0.8" class="mt-1 block w-full py-2 px-3 border border-gray-300 bg-white rounded-md shadow-sm focus:outline-none focus:ring-blue-500 focus:border-blue-500 sm:text-sm"> </td> </tr> <tr> <td colspan="2" class="py-4"> <input type="submit" value="使用图片检索" class="bg-blue-500 hover:bg-blue-600 text-white py-2 px-4 rounded-md shadow-md focus:outline-none focus:ring-2 focus:ring-blue-500 focus:ring-opacity-50"> </td> </tr> </tbody> </table> </form> {% if matching_records %} <h2 class="text-2xl font-bold mt-8 mb-4">匹配结果:</h2> <table class="w-full bg-white rounded shadow-md"> <thead> <tr> {% for column in columns %} <th class="py-3 px-6 text-left text-sm font-medium text-gray-700 border-b border-gray-200"> {{ column }}</th> {% endfor %} </tr> </thead> <tbody> {% for record in matching_records %} <tr> {% for value in record %} {% set i = loop.index0 %} {# 获取当前循环的索引,从0开始 #} {% if columns[i] == column_name %} <td class="py-4 px-6 border-b border-gray-200"><img src="data:image/jpeg;base64,{{ value }}" alt="图片" width="100"></td> {% else %} <td class="py-4 px-6 border-b border-gray-200">{{ value }}</td> {% endif %} {% endfor %} </tr> {% endfor %} </tbody> </table> {% endif %} </div> <script> function updateTables() { var dbname = document.getElementById('dbname').value; var xhr = new XMLHttpRequest(); xhr.open('GET', `/get_tables?dbname=${dbname}`, true); xhr.onreadystatechange = function () { if (xhr.readyState === 4 && xhr.status === 200) { var response = JSON.parse(xhr.responseText); var tbnames = response.tbnames; var tbnameSelect = document.getElementById('tbname'); tbnameSelect.innerHTML = ''; tbnames.forEach(function (tbname) { var option = document.createElement('option'); option.value = tbname; option.textContent = tbname; tbnameSelect.appendChild(option); }); // 初始化列名选择框 var columnSelect = document.getElementById('column_name'); columnSelect.innerHTML = ''; } }; xhr.send(); } function updateColumns() { var dbname = document.getElementById('dbname').value; var tbname = document.getElementById('tbname').value; var xhr = new XMLHttpRequest(); xhr.open('GET', `/get_columns?dbname=${dbname}&tbname=${tbname}`, true); xhr.onreadystatechange = function () { if (xhr.readyState === 4) { if (xhr.status === 200) { try { var response = JSON.parse(xhr.responseText); var column_names = response.column_names; var columnSelect = document.getElementById('column_name'); columnSelect.innerHTML = ''; column_names.forEach(function (column_name) { var option = document.createElement('option'); option.value = column_name; option.textContent = column_name; columnSelect.appendChild(option); }); } catch (error) { console.error('解析 JSON 失败:', error); } } else { console.error('请求失败,状态码:', xhr.status); } } }; xhr.send(); } </script></body></html>
希望本文能够帮助大家更好地理解和使用GBase 8a数据库的图片检索功能。如果您想进一步了解和体验GBase数据库,请随时联系我们或访问GBase社区(www.gbase.cn)。