java判断字符串是否中文、日文

　　直接上代码：

import java.io.UnsupportedEncodingException;

public class StringUtil {

    /**

     * 去掉字符串中的标点符号、空格和数字

     *

     * @param input

     * @return

     */

    public static String removePunctuation(String input) {

        return input.replaceAll(" +", "").replaceAll("[\\pP\\p{Punct}]", "").replaceAll("\\d+", "");

    }

    /**

     * 判断字符串是否为中文

     *

     * @param input

     * @return

     */

    public static boolean isChinese(String input) {

        return input.matches("^[\u4e00-\u9fa5]+$");

    }

    /**

     * 判断字符串是否为日文

     *

     * @param input

     * @return

     */

    public static boolean isJapanese(String input) {

        try {

            return input.getBytes("shift-jis").length >= (2 * input.length());

        } catch (UnsupportedEncodingException e) {

            return false;

        }

    }

    /**

     * 将字符串转字码点

     *

     * @param input

     * @return

     */

    public static String stringToUnicode(String input) throws UnsupportedEncodingException {

        StringBuffer out = new StringBuffer();

        byte[] bytes = input.getBytes("unicode");

        //将其byte转换成对应的16进制表示

        for (int i = 0; i < bytes.length - 1; i += 2) {

            out.append("\\u");

            String str = Integer.toHexString(bytes[i + 1] & 0xff);

            for (int j = str.length(); j < 2; j++) {

                out.append("0");

            }

            String str1 = Integer.toHexString(bytes[i] & 0xff);

            out.append(str1);

            out.append(str);

        }

        return out.toString();

    }

    /**

     * 字码点转字符串

     *

     * @param unicode

     * @return

     */

    public static String unicodeToString(String unicode) {

        StringBuffer sb = new StringBuffer();

        String[] hex = unicode.split("\\\\u");

        for (int i = 1; i < hex.length; i++) {

            int index = Integer.parseInt(hex[i], 16);

            sb.append((char) index);

        }

        return sb.toString();

    }

    public static void main(String[] args) throws UnsupportedEncodingException {

        String ss = "中华人民共和国";

        String ss1 = "おはよう";

        System.out.println(ss + " shift-jis编码字符数：" + ss.getBytes("shift-jis").length);

        System.out.println(ss + " 2倍字符数：" + ss.length() * 2);

        System.out.println(ss1 + "shift-jis编码字符数：" + ss1.getBytes("shift-jis").length);

        System.out.println(ss1 + " 2倍字符数：" + ss1.length() * 2);

        System.out.println(ss + " 字码点：" + stringToUnicode(ss));

        System.out.println(ss1 + " 字码点：" + stringToUnicode(ss1));

        System.out.println("\\u4e00 -\\u9fa5" + "对应的中文是：" + unicodeToString("\\u4e00") + " - " + unicodeToString("\\u9fa5"));

        String s = "中华人民共和国，成立了~~~";

        String s1 = "1个和尚挑水喝， 2个和尚抬水喝， 3个和尚没呀没水喝";

        String s2 = "あなたのお父さんとお母さんは大阪に行って、あなたのおじいさんとお婆さんはみんな東京に行って、あなたの弟の妹は北海道に行きました。";

        String s3 = "1お、 2は、 3よ、 4う,呵呵";

        System.out.println("开始测试：");

        System.out.println(s + (isChinese(removePunctuation(s)) ? "是" : "不是") + "中文");

        System.out.println(s1 + (isChinese(removePunctuation(s1)) ? "是" : "不是") + "中文");

        System.out.println(s2 + (isJapanese(removePunctuation(s2)) ? "是" : "不是") + "日文");

        System.out.println(s3 + (isJapanese(removePunctuation(s3)) ? "是" : "不是") + "日文");

    }

}

　　输出：

中华人民共和国 shift-jis编码字符数：13

中华人民共和国 2倍字符数：14

おはようshift-jis编码字符数：8

おはよう 2倍字符数：8

中华人民共和国 字码点：\ufeff\u4e2d\u534e\u4eba\u6c11\u5171\u548c\u56fd

おはよう 字码点：\ufeff\u304a\u306f\u3088\u3046

\u4e00 -\u9fa5对应的中文是：一 - 龥

开始测试：

中华人民共和国，成立了~~~是中文

1个和尚挑水喝， 2个和尚抬水喝， 3个和尚没呀没水喝是中文

あなたのお父さんとお母さんは大阪に行って、あなたのおじいさんとお婆さんはみんな東京に行って、あなたの弟の妹は北海道に行きました。是日文

1お、 2は、 3よ、 4う,呵呵是日文

巴特西

java判断字符串是否中文、日文

最新文章

热门文章