PostgreSQL字符串函数
本章将详细介绍PostgreSQL中的字符串函数,包括字符串连接、大小写转换、子字符串提取、模式匹配等,帮助您高效处理文本数据。
字符串基本操作
字符串连接
多种方式连接字符串:
SELECT
'Hello' || ' ' || 'World' AS concat_operator, -- 使用||操作符
CONCAT('Hello', ' ', 'World') AS concat_function, -- 使用CONCAT函数
CONCAT_WS(' ', 'Hello', 'Beautiful', 'World') AS concat_ws_function; -- 使用分隔符连接
字符串长度
获取字符串长度:
SELECT
LENGTH('Hello World') AS char_length, -- 字符长度
CHAR_LENGTH('Hello World') AS char_length_alt, -- 同LENGTH
OCTET_LENGTH('Hello World') AS byte_length; -- 字节长度
大小写转换
转换字符串大小写:
SELECT
UPPER('Hello World') AS uppercase, -- 转大写
LOWER('Hello World') AS lowercase, -- 转小写
INITCAP('hello world') AS initcap; -- 首字母大写
字符串提取和定位
子字符串提取
从字符串中提取子串:
SELECT
SUBSTRING('Hello World' FROM 1 FOR 5) AS substring_pos, -- 从位置1开始取5个字符
SUBSTRING('Hello World' FROM 'Hello') AS substring_pattern, -- 使用正则表达式
LEFT('Hello World', 5) AS left_chars, -- 从左边取5个字符
RIGHT('Hello World', 5) AS right_chars; -- 从右边取5个字符
字符串定位
查找字符或子串的位置:
SELECT
POSITION('World' IN 'Hello World') AS position_result, -- 查找位置
STRPOS('Hello World', 'World') AS strpos_result; -- 同POSITION
字符串修改
字符串替换
替换字符串中的字符或子串:
SELECT
REPLACE('Hello World', 'World', 'PostgreSQL') AS replace_result, -- 替换子串
TRANSLATE('Hello', 'el', 'ip') AS translate_result; -- 字符映射替换
字符串填充
在字符串两侧填充字符:
SELECT
LPAD('Hello', 10, '*') AS left_padded, -- 左侧填充到10个字符
RPAD('Hello', 10, '*') AS right_padded, -- 右侧填充到10个字符
字符串修剪
去除字符串首尾的字符:
SELECT
TRIM(' Hello World ') AS trim_spaces, -- 去除首尾空格
TRIM(LEADING 'x' FROM 'xxxHelloxxx') AS trim_leading, -- 去除开头的x
TRIM(TRAILING 'x' FROM 'xxxHelloxxx') AS trim_trailing, -- 去除结尾的x
TRIM(BOTH 'x' FROM 'xxxHelloxxx') AS trim_both, -- 去除首尾的x
BTRIM(' Hello World ') AS btrim_result, -- 同TRIM
LTRIM(' Hello World ') AS ltrim_result, -- 去除开头空格
RTRIM(' Hello World ') AS rtrim_result; -- 去除结尾空格
模式匹配
LIKE操作符
使用LIKE进行简单模式匹配:
SELECT * FROM users
WHERE username LIKE 'john%'; -- 以john开头的用户名
SELECT * FROM users
WHERE email LIKE '%@gmail.com'; -- Gmail邮箱
SELECT * FROM products
WHERE name LIKE 'A%B'; -- 以A开头B结尾的产品名
正则表达式匹配
使用正则表达式进行复杂模式匹配:
SELECT
'Hello World' ~ 'Hello' AS regex_match, -- 匹配返回true
'Hello World' ~* 'hello' AS regex_match_case_insensitive, -- 不区分大小写匹配
'Hello World' !~ 'xyz' AS regex_not_match, -- 不匹配返回true
SUBSTRING('Hello World' FROM 'H.*o') AS regex_extract; -- 正则表达式提取
正则表达式替换
使用正则表达式替换文本:
SELECT
REGEXP_REPLACE('Hello World 123', '[0-9]+', 'NUM') AS regex_replace, -- 替换数字
REGEXP_REPLACE('John Doe', '(\w+)\s+(\w+)', '\2, \1') AS regex_rearrange; -- 重排姓名
字符串分割
分割字符串
将字符串分割为数组:
SELECT
STRING_TO_ARRAY('apple,banana,orange', ',') AS string_array, -- 分割为数组
UNNEST(STRING_TO_ARRAY('apple,banana,orange', ',')) AS unnested_array; -- 展开数组
数组转字符串
将数组连接为字符串:
SELECT
ARRAY_TO_STRING(ARRAY['apple', 'banana', 'orange'], ', ') AS array_string; -- 数组转字符串
特殊字符处理
重复字符串
重复字符串指定次数:
SELECT
REPEAT('Hello', 3) AS repeated_string; -- 重复3次
反转字符串
将字符串反转:
SELECT
REVERSE('Hello World') AS reversed_string; -- 反转字符串
ASCII和字符转换
ASCII码和字符之间的转换:
SELECT
ASCII('A') AS ascii_value, -- 获取字符ASCII码
CHR(65) AS char_from_ascii; -- 根据ASCII码获取字符
实际应用场景
数据清洗
-- 清洗用户数据
UPDATE users
SET
username = TRIM(LOWER(username)), -- 去除空格并转小写
email = LOWER(TRIM(email)), -- 邮箱转小写并去空格
phone = REGEXP_REPLACE(phone, '[^0-9]', '', 'g'); -- 只保留数字
格式化输出
-- 格式化姓名显示
SELECT
CONCAT(UPPER(LEFT(first_name, 1)), LOWER(SUBSTRING(first_name, 2))) || ' ' ||
UPPER(last_name) AS formatted_name
FROM users;
-- 格式化电话号码
SELECT
CONCAT(
'(', SUBSTRING(phone, 1, 3), ') ',
SUBSTRING(phone, 4, 3), '-',
SUBSTRING(phone, 7, 4)
) AS formatted_phone
FROM users;
搜索和过滤
-- 全文搜索
SELECT * FROM articles
WHERE LOWER(title) LIKE '%postgresql%'
OR LOWER(content) LIKE '%postgresql%';
-- 使用正则表达式搜索邮箱格式
SELECT * FROM users
WHERE email ~* '^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}$';
数据验证
-- 验证数据格式
SELECT
username,
email,
CASE
WHEN LENGTH(username) < 3 THEN 'Username too short'
WHEN email !~* '^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}$' THEN 'Invalid email'
ELSE 'Valid'
END AS validation_result
FROM users;
性能优化
索引优化
为字符串操作创建合适的索引:
-- 为前缀匹配创建索引
CREATE INDEX idx_users_username_prefix ON users (username varchar_pattern_ops);
-- 为大小写不敏感查询创建索引
CREATE INDEX idx_users_email_lower ON users (LOWER(email));
-- 为正则表达式创建索引(部分索引)
CREATE INDEX idx_users_email_gmail ON users (email) WHERE email LIKE '%@gmail.com';
函数索引
为常用字符串函数创建索引:
-- 为字符串长度创建索引
CREATE INDEX idx_products_name_length ON products (LENGTH(name));
-- 为子字符串创建索引
CREATE INDEX idx_users_username_prefix3 ON users (SUBSTRING(username, 1, 3));
综合示例
-- 创建用户报告函数
CREATE OR REPLACE FUNCTION generate_user_report()
RETURNS TABLE(
user_category TEXT,
user_count BIGINT,
avg_username_length NUMERIC,
most_common_domain TEXT
) AS $$
BEGIN
RETURN QUERY
WITH domain_stats AS (
SELECT
SUBSTRING(email FROM POSITION('@' IN email) + 1) AS domain,
COUNT(*) AS domain_count
FROM users
WHERE email IS NOT NULL
GROUP BY domain
ORDER BY domain_count DESC
LIMIT 1
)
SELECT
CASE
WHEN LENGTH(username) < 5 THEN 'Short'
WHEN LENGTH(username) BETWEEN 5 AND 10 THEN 'Medium'
ELSE 'Long'
END AS user_category,
COUNT(*) AS user_count,
ROUND(AVG(LENGTH(username)), 2) AS avg_username_length,
(SELECT domain FROM domain_stats) AS most_common_domain
FROM users
WHERE username IS NOT NULL
GROUP BY
CASE
WHEN LENGTH(username) < 5 THEN 'Short'
WHEN LENGTH(username) BETWEEN 5 AND 10 THEN 'Medium'
ELSE 'Long'
END
ORDER BY user_count DESC;
END;
$$ LANGUAGE plpgsql;
-- 使用函数生成报告
SELECT * FROM generate_user_report();
提示:在处理大量文本数据时,合理使用字符串函数和索引可以显著提高查询性能。对于复杂的文本处理需求,考虑使用正则表达式或全文搜索功能。