Login

Davider · (This post was last modified: 07-21-2022, 09:38 PM by Davider.)

QM code, looks more concise and easier, For C# My level of programming is not very good Smile

I looked up some examples and got the code below

in qm, how Gets the code page for the text encoding?

I looked for some C code for code page

Macro Macro12

Code: Copy      Help
_s.getfile("$desktop$\Test.txt") ;;cp gb2312

;Todo: Gets the code page for the text encoding

_s.ConvertEncoding(936 65001) ;;gb2312 to utf8

_s.findreplace("测试" "正式") ;;replace

_s.ConvertEncoding(65001 936) ;;UTF8 to gb2312

_s.setfile("$desktop$\Test_ok.txt")

C code for code page

Code:

Copy Help

 

#include <stdio.h>

#include <string.h>

#include <stdlib.h>



bool is_str_utf8(const char* str);

bool is_str_gbk(const char* str);



//Judge if it is UTF-8

bool is_str_utf8(const char* str)

{

unsigned int nBytes = 0;//UFT8Can be encoded in 1-6 bytes,ASCIIWith one byte

unsigned char chr = *str;

bool bAllAscii = true;

for (unsigned int i = 0; str[i] != '\0'; ++i) {

chr = *(str + i);

//Determine if asCII is encoded, if not, it is possible that it is UTF8, ASCII is encoded in 7 bits, and the highest bit is labeled 0,0xxxxxxx

if (nBytes == 0 && (chr & 0x80) != 0) {

bAllAscii = false;

}

if (nBytes == 0) {

//If it is not an ASCII code, it should be a multibyte character, which calculates the number of bytes

if (chr >= 0x80) {

if (chr >= 0xFC && chr <= 0xFD) {

nBytes = 6;

}

else if (chr >= 0xF8) {

nBytes = 5;

}

else if (chr >= 0xF0) {

nBytes = 4;

}

else if (chr >= 0xE0) {

nBytes = 3;

}

else if (chr >= 0xC0) {

nBytes = 2;

}

else {

return false;

}

nBytes--;

}

}

else {

//The non-first byte of the multibyte character should be 10xxxxxx

if ((chr & 0xC0) != 0x80) {

return false;

}

//Reduce to zero

nBytes--;

}

}

//Violation of UTF8 encoding rules

if (nBytes != 0) {

return false;

}

if (bAllAscii) { //If it's all ASCII, it's also UTF8

return true;

}

return true;

}



//Judge if it is GB2312

bool is_str_gbk(const char* str)

{

unsigned int nBytes = 0;//GB2312 Can be encoded in 1-2 bytes, Chinese two and one in English

unsigned char chr = *str;

bool bAllAscii = true; //If it's all ASCII,

for (unsigned int i = 0; str[i] != '\0'; ++i) {

chr = *(str + i);

if ((chr & 0x80) != 0 && nBytes == 0) {// Determine whether it is ASCII encoding, if not, it may be GB2312

bAllAscii = false;

}

if (nBytes == 0) {

if (chr >= 0x80) {

if (chr >= 0x81 && chr <= 0xFE) {

nBytes = +2;

}

else {

return false;

}

nBytes--;

}

}

else {

if (chr < 0x40 || chr>0xFE) {

return false;

}

nBytes--;

}//else end

}

if (nBytes != 0) {   //Violation rules

return false;

}

if (bAllAscii) { //If it's all ASCII, it's also GB2312

return true;

}

return true;

}



//Read the file

void read_text(const char* file_name)

{

char line[1024] = { 0 };

FILE *file = fopen(file_name, "rt");

if (!file)

return;

while (1)

{

//End of file read

if (EOF == fscanf(file, "%s", line))

break;

printf("%s\n", line);

}

printf("%d\n", is_str_utf8(line)); 

printf("%d\n", is_str_gbk(line));

fclose(file);



}



//Main function testing

int main() {

read_text("test.txt");

return 0;

}

Login
Username:
Password:	Lost Password?
	Remember me