您的位置:时时app平台注册网站 > 编程知识 > C#如何自动识别文件的编码

C#如何自动识别文件的编码

2019-11-21 02:19
/***
 * @pName caffe_task_pool_demo
 * @name CC
 * @user wadezh
 * @date 2018/6/16
 * @desc
 */
using System;
using System.Collections;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Runtime.InteropServices;
using System.Text;
using System.Threading.Tasks;

namespace caffe_task_pool_demo
{
    class CC
    {

        public static int taskPool { get; set; } = 0;
        public static string prototxt { get; set; }
        public static ArrayList map { get; set; }
        public static int timeStep { get; set; }
        public static int alphabetSize { get; set; }

        /*Caffe_API TaskPool* __stdcall createTaskPoolByData(

        const void* prototxt_data,

        int prototxt_data_length,

        const void* caffemodel_data,

        int caffemodel_data_length,

        float scale_raw = 1,

        const char* mean_file = 0,

        int num_means = 0,

        float* means = 0,

        int gpu_id = -1,

        int batch_size = 3);*/

        [DllImport("classification_dll.dll", EntryPoint = "createTaskPoolByData", CallingConvention = CallingConvention.StdCall)] 
        public static extern int CreateTaskPoolByData(byte[] prototxt_data,
        int prototxt_data_length,
        byte[] caffemodel_data,
        int caffemodel_data_length,
        float scale_raw = 1,
        string mean_file = "",
        int num_means = 0,
        float means = 0,
        int gpu_id = -1,
        int cach_size = 1);


        /*Caffe_API BlobData* __stdcall forwardByTaskPool(TaskPool* pool, const void* img, int len, const char* blob_name);*/

        [DllImport("classification_dll.dll", EntryPoint = "forwardByTaskPool", CallingConvention = CallingConvention.StdCall)]
        public static extern int ForwardByTaskPool(int poolHandle, byte[] image, int imageLen, string printBlobName);

        /*Caffe_API int __stdcall getBlobLength(BlobData* feature);*/
        [DllImport("classification_dll.dll", EntryPoint = "getBlobLength", CallingConvention = CallingConvention.StdCall)]
        public static extern int GetBlobLength(int feature);

        /*Caffe_API void __stdcall cpyBlobData(void* buffer, BlobData* feature);*/
        [DllImport("classification_dll.dll", EntryPoint = "cpyBlobData", CallingConvention = CallingConvention.StdCall)]
        public static extern int CpyBlobData(float[] buffer, int feature);

        /*Caffe_API void  __stdcall releaseBlobData(BlobData* ptr);*/
        [DllImport("classification_dll.dll", EntryPoint = "releaseBlobData", CallingConvention = CallingConvention.StdCall)]
        public static extern int ReleaseBlobData(int ptr);

        private static int Argmax(float[] arr, int begin, int end, ref float acc)
        {
            acc = -9999;
            int mxInd = 0;
            for (int i = begin; i < end; i  )
            {
                if (acc < arr[i])
                {
                    mxInd = i;
                    acc = arr[i];
                }
            }
            return mxInd - begin;
        }


        public static bool InitCaptcha(string prototxtPath, string modelPath, string mapPath, int gpuId, int batchSize) {
            byte[] deploy = Util.GetFileStream(prototxtPath);
            byte[] model = Util.GetFileStream(modelPath);
            CC.taskPool = CC.CreateTaskPoolByData(deploy, deploy.Length, model, model.Length, 1F, "", 0, 0F, gpuId, batchSize);
            CC.prototxt = System.Text.Encoding.Default.GetString(deploy);
            string[] mapFile = Util.LoadStringFromFile(mapPath).Trim().Split("rn".ToArray());
            CC.map = new ArrayList();
            for (int i = 0; i < mapFile.Length; i  )
            {
                if (mapFile[i].Length > 0)
                {
                    CC.map.Add(mapFile[i]);
                }
            }
            string time_step = Util.GetMiddleString(CC.prototxt, "time_step:", "rn");
            string layer = Util.GetMiddleString(CC.prototxt, "inner_product_param {", "{");
            string alphabet_size = Util.GetMiddleString(layer, "num_output:", "rn");
            CC.timeStep = int.Parse(time_step);
            CC.alphabetSize = int.Parse(alphabet_size);
            return CC.taskPool != 0;
        }


        public static string GetCaptcha(byte[] image) {
            // Get the prediction result handle
            int poolHandle = CC.ForwardByTaskPool(taskPool, image, image.Length, "premuted_fc");

            // Get the tensor handle
            float[] permute_fc = new float[CC.GetBlobLength(poolHandle)];

            // Copy the tensor data
            CpyBlobData(permute_fc, poolHandle);
            string code = string.Empty;

            if (permute_fc.Length > 0)
            {
                int o = 0;
                float acc = 0F;
                int emptyLabel = alphabetSize - 1;
                int prev = emptyLabel;
                for (int i = 1; i < timeStep; i  )
                {
                    o = Argmax(permute_fc, (i - 1) * alphabetSize   1, i * alphabetSize, ref acc);
                    if (o != emptyLabel && prev != o) code  = map[o   1];
                    prev = o;
                }
                code = code.Replace("_", "").Trim();
            }

            ReleaseBlobData(poolHandle);
            return code;
        }

        protected class Util
        {



            public static byte[] GetFileStream(string path)
            {
                FileStream fs = new FileStream(path, FileMode.Open);
                long size = fs.Length;
                byte[] array = new byte[size];
                fs.Read(array, 0, array.Length);
                fs.Close();
                return array;
            }


            public static string LoadStringFromFile(string fileName)
            {
                string content = string.Empty;

                StreamReader sr = null;
                try
                {
                    sr = new StreamReader(fileName, System.Text.Encoding.UTF8);
                    content = sr.ReadToEnd();
                }
                catch (Exception ex)
                {
                    throw ex;
                }

                if (sr != null)
                    sr.Close();

                return content;
            }



            public static string GetMiddleString(string SumString, string LeftString, string RightString)
            {
                if (string.IsNullOrEmpty(SumString)) return "";
                if (string.IsNullOrEmpty(LeftString)) return "";
                if (string.IsNullOrEmpty(RightString)) return "";

                int LeftIndex = SumString.IndexOf(LeftString);
                if (LeftIndex == -1) return "";
                LeftIndex = LeftIndex   LeftString.Length;
                int RightIndex = SumString.IndexOf(RightString, LeftIndex);
                if (RightIndex == -1) return "";
                return SumString.Substring(LeftIndex, RightIndex - LeftIndex);
            }

        }

    }

}

下面话不多说,上代码。

python版实现的地址:

 

总结

 

我封装的这个CC类只支持GPU任务池识别,识别速度比较快

C#中识别文件的编码是一个头疼的问题,最近在做导入微信商户后台退款数据时,无论怎么设置编码导出来都是乱码,后来在网上找了这个识别文件编码的代码,感觉不错。最后识别出来是gb2312,看来我还是太渣了,只能吃土了,竟然忘记了这个编码。

由于我不是很懂python语法,也是直接去的w3c看的教程,为此还掉进了一个坑..详见:

图片 1

/// <summary> 
 /// 用于取得一个文本文件的编码方式(Encoding)。 
 /// </summary> 
 public class TxtFileEncoder
 {
  public TxtFileEncoder()
  {
   // 
   // TODO: 在此处添加构造函数逻辑 
   // 
  }
  /// <summary> 
  /// 取得一个文本文件的编码方式。如果无法在文件头部找到有效的前导符,Encoding.Default将被返回。 
  /// </summary> 
  /// <param name="fileName">文件名。</param> 
  /// <returns></returns> 
  public static Encoding GetEncoding(string fileName)
  {
   return GetEncoding(fileName, Encoding.Default);
  }
  /// <summary> 
  /// 取得一个文本文件流的编码方式。 
  /// </summary> 
  /// <param name="stream">文本文件流。</param> 
  /// <returns></returns> 
  public static Encoding GetEncoding(FileStream stream)
  {
   return GetEncoding(stream, Encoding.Default);
  }
  /// <summary> 
  /// 取得一个文本文件的编码方式。 
  /// </summary> 
  /// <param name="fileName">文件名。</param> 
  /// <param name="defaultEncoding">默认编码方式。当该方法无法从文件的头部取得有效的前导符时,将返回该编码方式。</param> 
  /// <returns></returns> 
  public static Encoding GetEncoding(string fileName, Encoding defaultEncoding)
  {
   FileStream fs = new FileStream(fileName, FileMode.Open);
   Encoding targetEncoding = GetEncoding(fs, defaultEncoding);
   fs.Close();
   return targetEncoding;
  }
  /// <summary> 
  /// 取得一个文本文件流的编码方式。 
  /// </summary> 
  /// <param name="stream">文本文件流。</param> 
  /// <param name="defaultEncoding">默认编码方式。当该方法无法从文件的头部取得有效的前导符时,将返回该编码方式。</param> 
  /// <returns></returns> 
  public static Encoding GetEncoding(FileStream stream, Encoding defaultEncoding)
  {
   Encoding targetEncoding = defaultEncoding;
   if (stream != null && stream.Length >= 2)
   {
    //保存文件流的前4个字节 
    byte byte1 = 0;
    byte byte2 = 0;
    byte byte3 = 0;
    byte byte4 = 0;
    //保存当前Seek位置 
    long origPos = stream.Seek(0, SeekOrigin.Begin);
    stream.Seek(0, SeekOrigin.Begin);

    int nByte = stream.ReadByte();
    byte1 = Convert.ToByte(nByte);
    byte2 = Convert.ToByte(stream.ReadByte());
    if (stream.Length >= 3)
    {
     byte3 = Convert.ToByte(stream.ReadByte());
    }
    if (stream.Length >= 4)
    {
     byte4 = Convert.ToByte(stream.ReadByte());
    }
    //根据文件流的前4个字节判断Encoding 
    //Unicode {0xFF, 0xFE}; 
    //BE-Unicode {0xFE, 0xFF}; 
    //UTF8 = {0xEF, 0xBB, 0xBF}; 
    if (byte1 == 0xFE && byte2 == 0xFF)//UnicodeBe 
    {
     targetEncoding = Encoding.BigEndianUnicode;
    }
    if (byte1 == 0xFF && byte2 == 0xFE && byte3 != 0xFF)//Unicode 
    {
     targetEncoding = Encoding.Unicode;
    }
    if (byte1 == 0xEF && byte2 == 0xBB && byte3 == 0xBF)//UTF8 
    {
     targetEncoding = Encoding.UTF8;
    }
    //恢复Seek位置   
    stream.Seek(origPos, SeekOrigin.Begin);
   }
   return targetEncoding;
  }



  // 新增加一个方法,解决了不带BOM的 UTF8 编码问题 

  /// <summary> 
  /// 通过给定的文件流,判断文件的编码类型 
  /// </summary> 
  /// <param name="fs">文件流</param> 
  /// <returns>文件的编码类型</returns> 
  public static System.Text.Encoding GetEncoding(Stream fs)
  {
   byte[] Unicode = new byte[] { 0xFF, 0xFE, 0x41 };
   byte[] UnicodeBIG = new byte[] { 0xFE, 0xFF, 0x00 };
   byte[] UTF8 = new byte[] { 0xEF, 0xBB, 0xBF }; //带BOM 
   Encoding reVal = Encoding.Default;

   BinaryReader r = new BinaryReader(fs, System.Text.Encoding.Default);
   byte[] ss = r.ReadBytes(4);
   if (ss[0] == 0xFE && ss[1] == 0xFF && ss[2] == 0x00)
   {
    reVal = Encoding.BigEndianUnicode;
   }
   else if (ss[0] == 0xFF && ss[1] == 0xFE && ss[2] == 0x41)
   {
    reVal = Encoding.Unicode;
   }
   else
   {
    if (ss[0] == 0xEF && ss[1] == 0xBB && ss[2] == 0xBF)
    {
     reVal = Encoding.UTF8;
    }
    else
    {
     int i;
     int.TryParse(fs.Length.ToString(), out i);
     ss = r.ReadBytes(i);

     if (IsUTF8Bytes(ss))
      reVal = Encoding.UTF8;
    }
   }
   r.Close();
   return reVal;

  }

  /// <summary> 
  /// 判断是否是不带 BOM 的 UTF8 格式 
  /// </summary> 
  /// <param name="data"></param> 
  /// <returns></returns> 
  private static bool IsUTF8Bytes(byte[] data)
  {
   int charByteCounter = 1;  //计算当前正分析的字符应还有的字节数 
   byte curByte; //当前分析的字节. 
   for (int i = 0; i < data.Length; i  )
   {
    curByte = data[i];
    if (charByteCounter == 1)
    {
     if (curByte >= 0x80)
     {
      //判断当前 
      while (((curByte <<= 1) & 0x80) != 0)
      {
       charByteCounter  ;
      }
      //标记位首位若为非0 则至少以2个1开始 如:110XXXXX...........1111110X  
      if (charByteCounter == 1 || charByteCounter > 6)
      {
       return false;
      }
     }
    }
    else
    {
     //若是UTF-8 此时第一位必须为1 
     if ((curByte & 0xC0) != 0x80)
     {
      return false;
     }
     charByteCounter--;
    }
   }
   if (charByteCounter > 1)
   {
    throw new Exception("非预期的byte格式!");
   }
   return true;
  }
 }

链接: 密码:ct5z

前言

由于我对图像处理这里不是很在行,python也是临时看的,故这种细化提取骨架的算法也就是直接移植原代码,并没做什么优化之类..代码很粗糙..

一开始我在网上找demo没有找到,在群里寻求帮助也没有得到结果,索性将网上的易语言模块反编译之后,提取出对应的dll以及代码,然后对照官方的c 代码,写出了下面的c#版本

您可能感兴趣的文章:

  • asp.C#实现图片文件与base64string编码解码
  • C#实现获取文本文件的编码的一个类(区分GB2312和UTF8)
  • C#读写指定编码格式的文本文件
  • c#检测文本文件编码的方法
  • c# Base64编码和图片的互相转换代码
  • c#字符串编码编码(encoding)使用方法示例
  • C#中字符串编码处理
  • JS与C#编码解码
  • C# Base64编码函数
  • C#解码base64编码二进制数据的方法
package com.ocr.imgocr;

import java.awt.Color;
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.IOException;

import javax.imageio.ImageIO;

public class Thin {
    //索引数组
    private static Integer[] array = {0,0,1,1,0,0,1,1,1,1,0,1,1,1,0,1,
             1,1,0,0,1,1,1,1,0,0,0,0,0,0,0,1,
             0,0,1,1,0,0,1,1,1,1,0,1,1,1,0,1,
             1,1,0,0,1,1,1,1,0,0,0,0,0,0,0,1,
             1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,
             0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
             1,1,0,0,1,1,0,0,1,1,0,1,1,1,0,1,
             0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
             0,0,1,1,0,0,1,1,1,1,0,1,1,1,0,1,
             1,1,0,0,1,1,1,1,0,0,0,0,0,0,0,1,
             0,0,1,1,0,0,1,1,1,1,0,1,1,1,0,1,
             1,1,0,0,1,1,1,1,0,0,0,0,0,0,0,0,
             1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,
             1,1,0,0,1,1,1,1,0,0,0,0,0,0,0,0,
             1,1,0,0,1,1,0,0,1,1,0,1,1,1,0,0,
             1,1,0,0,1,1,1,0,1,1,0,0,1,0,0,0};

    public static boolean isWhite(int colorInt) {  
        Color color = new Color(colorInt);  
        if (color.getRed()   color.getGreen()   color.getBlue() > 400) {  
            return true;  
        }  
        return false;  
    } 

    public static BufferedImage VThin(BufferedImage image,Integer[] array){
        int h = image.getHeight();
        int w = image.getWidth();
        int NEXT = 1;
        for(int i=0;i<h;i  ){
            for(int j=0;j<w;j  ){
                if (NEXT == 0){
                    NEXT = 1;
                }else{
                    int M ;
                    if( 0<j&&j<w-1){
                        if(isBlack(image.getRGB(j-1,i))&&isBlack(image.getRGB(j,i))&&isBlack(image.getRGB(j 1,i))){
                            M=0;
                        }else{
                            M=1;
                        }
                    }else {
                        M = 1;
                    }
                    if(isBlack(image.getRGB(j,i))&&M!=0){      
                        int[] a = {0,0,0,0,0,0,0,0,0};
                        for(int k=0;k<3;k  ){
                            for(int l=0;l<3;l  ){
                                if ((-1<(i-1 k)&&(i-1 k)<h) && (-1<(j-1 l)&&(j-1 l)<w) && isWhite(image.getRGB(j-1 l,i-1 k))){
                                    a[k*3 l] = 1;
                                }
                            }
                        }
                        int sum = a[0]*1 a[1]*2 a[2]*4 a[3]*8 a[5]*16 a[6]*32 a[7]*64 a[8]*128;
                        if(array[sum]==0){
                            image.setRGB(j, i, Color.black.getRGB());
                        }else{
                            image.setRGB(j, i, Color.white.getRGB());
                        }
                        if (array[sum] == 1){
                            NEXT = 0;
                        }
                    }
                }
            }
        }
        return image;
    }

    public static BufferedImage HThin(BufferedImage image,Integer[] array){
        int h = image.getHeight();
        int w = image.getWidth();
        int NEXT = 1;
        for(int j=0;j<w;j  ){ 
            for(int i=0;i<h;i  ){
                if (NEXT == 0){
                    NEXT = 1;
                }else{
                    int M;
                    if(0<i&&i<h-1){
                        if(isBlack(image.getRGB(j,i-1))&&isBlack(image.getRGB(j,i))&&isBlack(image.getRGB(j,i 1))){
                            M=0;
                        }else{
                            M=1;
                        }
                    }else{
                        M = 1;
                    }
                    if (isBlack(image.getRGB(j,i)) && M != 0){                  
                        int[] a = {0,0,0,0,0,0,0,0,0};
                        for(int k=0;k<3;k  ){
                            for(int l=0;l<3;l  ){
                                if ((-1<(i-1 k)&&(i-1 k)<h) && (-1<(j-1 l)&&(j-1 l)<w )&& isWhite(image.getRGB(j-1 l,i-1 k))){
                                    a[k*3 l] = 1;
                                }
                            }
                        }
                        int sum = a[0]*1 a[1]*2 a[2]*4 a[3]*8 a[5]*16 a[6]*32 a[7]*64 a[8]*128;
                        if(array[sum]==0){
                            image.setRGB(j, i, Color.black.getRGB());
                        }else{
                            image.setRGB(j, i, Color.white.getRGB());
                        }
                        if (array[sum] == 1){
                            NEXT = 0;
                        }
                    }
                }
            }
        }
        return image;
    }

    public static BufferedImage Xihua(BufferedImage image,Integer[] array){
        int num=10;
        BufferedImage iXihua = image;
        for(int i=0;i<num;i  ){
            VThin(iXihua,array);
            HThin(iXihua,array);
        }
        return iXihua;
    }

    public static BufferedImage Two(BufferedImage image){
        int w = image.getWidth();
        int h = image.getHeight();
        BufferedImage iTwo = image;
        for(int i=0;i<h;i  ){
            for(int j=0;j<w;j  ){
                if(isBlack(image.getRGB(j,i))){
                    iTwo.setRGB(j, i, Color.BLACK.getRGB());
                }else{
                    iTwo.setRGB(j, i, Color.WHITE.getRGB());
                }
            }
        }
        return iTwo;
    }

    public static boolean isBlack(int colorInt) {
        Color color = new Color(colorInt);
        if (color.getRed()   color.getGreen()   color.getBlue() <= 400) {
            return true;
        }
        return false;
    }

    public static void main(String[] args) {
        try {
            //原始图片路径
            BufferedImage image = ImageIO.read(new File("image" File.separator "0.jpg"));
            //二值化
            BufferedImage iTwo = Two(image);
            ImageIO.write(iTwo, "jpg", new File("image" File.separator "two.jpg"));
            //细化
            BufferedImage iThin = Xihua(image,array);
            ImageIO.write(iThin, "jpg", new File("image" File.separator "thin.jpg"));

        } catch (IOException e) {
            e.printStackTrace();
        }

    }

}

 

以上就是C#自动识别文件编码的全部内容了,希望本文的内容对大家的学习或者工作能带来一定的帮助,如果有疑问大家可以留言交流。

项目中我已经将caffemodel以及prototxt等文件都打包,可以直接运行

近期研究验证码识别,也就看了一些图像识别的资料,其中一种字体细化提取骨架的算法网上没有java版的实现,所以就选取了一个python实现版本进行java代码的改写..

本文由时时app平台注册网站发布于编程知识,转载请注明出处:C#如何自动识别文件的编码

关键词:

  • 上一篇:没有了
  • 下一篇:没有了