-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathGet-FileEncoding.ps1
84 lines (67 loc) · 2.27 KB
/
Get-FileEncoding.ps1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
##############################################################################
##
## Get-FileEncoding
##
## From Windows PowerShell Cookbook (O'Reilly)
## by Lee Holmes (http://www.leeholmes.com/guide)
##
##############################################################################
<#
.SYNOPSIS
Gets the encoding of a file
.EXAMPLE
Get-FileEncoding.ps1 .\UnicodeScript.ps1
BodyName : unicodeFFFE
EncodingName : Unicode (Big-Endian)
HeaderName : unicodeFFFE
WebName : unicodeFFFE
WindowsCodePage : 1200
IsBrowserDisplay : False
IsBrowserSave : False
IsMailNewsDisplay : False
IsMailNewsSave : False
IsSingleByte : False
EncoderFallback : System.Text.EncoderReplacementFallback
DecoderFallback : System.Text.DecoderReplacementFallback
IsReadOnly : True
CodePage : 1201
#>
param(
## The path of the file to get the encoding of.
$Path
)
Set-StrictMode -Version Latest
## The hashtable used to store our mapping of encoding bytes to their
## name. For example, "255-254 = Unicode"
$encodings = @{}
## Find all of the encodings understood by the .NET Framework. For each,
## determine the bytes at the start of the file (the preamble) that the .NET
## Framework uses to identify that encoding.
$encodingMembers = [System.Text.Encoding] |
Get-Member -Static -MemberType Property
$encodingMembers | Foreach-Object {
$encodingBytes = [System.Text.Encoding]::($_.Name).GetPreamble() -join '-'
$encodings[$encodingBytes] = $_.Name
}
## Find out the lengths of all of the preambles.
$encodingLengths = $encodings.Keys | Where-Object { $_ } |
Foreach-Object { ($_ -split "-").Count }
## Assume the encoding is UTF7 by default
$result = "UTF7"
## Go through each of the possible preamble lengths, read that many
## bytes from the file, and then see if it matches one of the encodings
## we know about.
foreach($encodingLength in $encodingLengths | Sort -Descending)
{
$bytes = (Get-Content -encoding byte -readcount $encodingLength $path)[0]
$encoding = $encodings[$bytes -join '-']
## If we found an encoding that had the same preamble bytes,
## save that output and break.
if($encoding)
{
$result = $encoding
break
}
}
## Finally, output the encoding.
[System.Text.Encoding]::$result