主页 > 编程资料 > C# >
发布时间:2015-09-26 作者:网络 阅读:277次

private void FindNoUTFFile(string Path)
   {
    System.IO.StreamReader reader = null;
    StringBuilder sb;
    StringBuilder sb2;   
    DirectoryInfo Folder = new System.IO.DirectoryInfo(Path,',',');
    DirectoryInfo[] subFolders = Folder.GetDirectories(,',',');
    for (int i=0;i    {
     FindNoUTFFile(subFolders[i].FullName,',',');
    }
    FileInfo[] subFiles = Folder.GetFiles(,',',');
    for(int j=0;j    {
     if(CheckFileType(subFiles[j].Extension.ToLower()))
     {         
      FileStream fs = new FileStream(subFiles[j].FullName , FileMode.Open,FileAccess.Read,',',');
      sb = new StringBuilder(,',',');
      sb2 = new StringBuilder(,',',');
      bool bUtf8 =IsUTF8(fs,',',');
      fs.Close(,',',');
      if (!bUtf8)
      {      
       reader = new System.IO.StreamReader(subFiles[j].FullName,System.Text.Encoding.UTF8,',',');
       sb2.Append(reader.ReadToEnd(),',',');
       reader.Close(,',',');
       reader = new System.IO.StreamReader(subFiles[j].FullName, System.Text.Encoding.Default,true,',',');    
       sb.Append(reader.ReadToEnd(),',',');
       reader.Close(,',',');
   }
     
     
     }
    }
   
   }
 
   //0000 0000-0000 007F - 0xxxxxxx  (ascii converts to 1 octet!)
   //0000 0080-0000 07FF - 110xxxxx 10xxxxxx    ( 2 octet format)
   //0000 0800-0000 FFFF - 1110xxxx 10xxxxxx 10xxxxxx (3 octet format)

   private static bool IsUTF8(FileStream sbInputStream)
   {
    int   i;
    byte cOctets;  // octets to go in this UTF-8 encoded character
    byte chr;
    bool  bAllAscii= true;
    long iLen = sbInputStream.Length;

    cOctets= 0;
    for( i=0; i < iLen; i++ )
    {
     chr = (byte)sbInputStream.ReadByte(,',',');

     if( (chr & 0x80) != 0 ) bAllAscii= false;

     if( cOctets == 0 ) 
     {
      if( chr >= 0x80 )
      { 
       do
       {
        chr <<= 1;
        cOctets++;
       }
       while( (chr & 0x80) != 0 ,',',');

       cOctets--;                       
       if( cOctets == 0 ) return false; 
      }
     }
     else
     {
      if( (chr & 0xC0) != 0x80 )
      {
       return false;
      }
      cOctets--;                      
     }
    }

    if( cOctets > 0 )
    { 
     return false;
    }

    if( bAllAscii )
    {   
     return false;
    }

    return true;

   }
  }
  
 
 }

 

关键字词: