1 module filesizes.filesize;
2 
3 import std.math : pow;
4 
5 /** 
6  * Represents the properties of a particular filesize denomination.
7  */
8 public struct SizeDenomination {
9     /** 
10      * The size factor for this denomination, in bytes. This tells how many
11      * bytes are in one "unit" of this denomination.
12      */
13     const ulong sizeFactor;
14 
15     /** 
16      * The shortname for the denomination.
17      */
18     const string abbreviation;
19 
20     /** 
21      * The full name for the denomination.
22      */
23     const string name;
24 }
25 
26 /** 
27  * Enumeration of various valid filesize denominations.
28  */
29 static enum Size : SizeDenomination {
30     BYTES = SizeDenomination(1, "B", "Byte"),
31     KILOBYTES = SizeDenomination(1000, "KB", "Kilobyte"),
32     MEGABYTES = SizeDenomination(pow(1000, 2), "MB", "Megabyte"),
33     GIGABYTES = SizeDenomination(pow(1000, 3), "GB", "Gigabyte"),
34     TERABYTES = SizeDenomination(pow(1000, 4), "TB", "Terabyte"),
35 
36     KIBIBYTES = SizeDenomination(1024, "KiB", "Kibibyte"),
37     MEBIBYTES = SizeDenomination(pow(1024, 2), "MiB", "Mebibyte"),
38     GIBIBYTES = SizeDenomination(pow(1024, 3), "GiB", "Gibibyte"),
39     TEBIBYTES = SizeDenomination(pow(1024, 4), "TiB", "Tebibyte")
40 }
41 
42 /** 
43  * Exception that's thrown when a filesize component cannot be parsed from a
44  * string.
45  */
46 public class FilesizeParseException : Exception {
47     public this(string msg) {
48         super(msg);
49     }
50 }
51 
52 /** 
53  * Parses a filesize from a string that's formatted like so:
54  * "<number> <unit>", where the unit is any one of the defined Size types.
55  * Accepts floating-point and integer numbers.
56  * Params:
57  *   s = The string to parse.
58  * Returns: The parsed filesize, in bytes.
59  */
60 public ulong parseFilesize(string s) {
61     import std.regex;
62     import std.conv : to;
63     import std.uni : toLower;
64     auto r = ctRegex!(`(\d*\.\d+|\d+)\s*(kilobyte|kibibyte|megabyte|mebibyte|gigabyte|gibibyte|terabyte|tebibyte|byte|kb|kib|mb|mib|gb|gib|tb|tib|b)`);
65     auto c = matchFirst(s.toLower, r);
66     if (c.empty) throw new FilesizeParseException("Could not parse file size.");
67     double num = c[1].to!double;
68     Size size = parseDenomination(c[2]);
69     return (num *= size.sizeFactor).to!ulong;
70 }
71 
72 unittest {
73     assert("1 b".parseFilesize == 1);
74     assert("2 kb".parseFilesize == 2000);
75     assert("1 kib".parseFilesize == 1024);
76     assert("0.5 mb".parseFilesize == Size.MEGABYTES.sizeFactor / 2);
77     assert("0.25 bytes".parseFilesize == 0);
78     assert("25gb".parseFilesize == Size.GIGABYTES.sizeFactor * 25);
79     assert("0.125 tib".parseFilesize == Size.TEBIBYTES.sizeFactor / 8);
80     assert("3 gibibytes".parseFilesize == Size.GIBIBYTES.sizeFactor * 3);
81     assert("1024 bytes".parseFilesize == 1024);
82     assert("2MB".parseFilesize == Size.MEGABYTES.sizeFactor * 2);
83     try {
84         "not a filesize".parseFilesize;
85         assert(false);
86     } catch (FilesizeParseException e) {}
87     try {
88         "".parseFilesize;
89         assert(false);
90     } catch (FilesizeParseException e) {}
91 }
92 
93 /** 
94  * Parses a filesize unit denomination from a string, such as "bytes",
95  * "kilobyte", or "tibibytes".
96  * Params:
97  *   s = The string to parse.
98  * Returns: 
99  */
100 public Size parseDenomination(string s) {
101     import std.uni : toLower;
102     import std.string : strip;
103     import std.traits : EnumMembers;
104     import std.algorithm.searching : startsWith;
105     s = s.toLower.strip;
106     foreach (denom; EnumMembers!Size) {
107         if (s.startsWith(denom.abbreviation.toLower) || s.startsWith(denom.name.toLower)) return denom;
108     }
109     throw new FilesizeParseException("Could not parse file size denomination.");
110 }
111 
112 unittest {
113     import std.traits : EnumMembers;
114     foreach (denom; EnumMembers!Size) {
115         assert(denom.abbreviation.parseDenomination == denom);
116         assert(denom.name.parseDenomination == denom);
117         assert((denom.name ~ "s").parseDenomination == denom);
118     }
119     try {
120         "not a denomination".parseDenomination;
121         assert(false);
122     } catch (FilesizeParseException e) {}
123     try {
124         "".parseDenomination;
125         assert(false);
126     } catch (FilesizeParseException e) {}
127 }
128 
129 /** 
130  * 
131  * Params:
132  *   formatString = The format string for formatting the size number.
133  *   byteSize = The total number of bytes.
134  *   size = The denomination to use when representing the size.
135  *   useAbbreviation = Whether to use the abbreviated unit name, or the full.
136  * Returns: A string representation of the file size.
137  */
138 public string formatFilesize(string formatString, ulong byteSize, SizeDenomination size, bool useAbbreviation = true) {
139     double bytes = cast(double) byteSize / size.sizeFactor;
140     import std.string : format;
141     string unit = useAbbreviation ? size.abbreviation : size.name;
142     if (!useAbbreviation && bytes != 1.0) {
143         unit ~= 's';
144     }
145     return format(formatString ~ " %s", bytes, unit);
146 }
147 
148 unittest {
149     assert(formatFilesize("%.0f", 42, Size.BYTES) == "42 B");
150     assert(formatFilesize("%.0f", 42, Size.BYTES, false) == "42 Bytes");
151     assert(formatFilesize("%.0f", 1, Size.BYTES, false) == "1 Byte");
152     assert(formatFilesize("%.1f", 512, Size.KIBIBYTES) == "0.5 KiB");
153     assert(formatFilesize("%.0f", 2000, Size.KILOBYTES) == "2 KB");
154     assert(formatFilesize("%.2f", 256, Size.KIBIBYTES) == "0.25 KiB");
155 }
156 
157 public string formatFilesize(string formatString, ulong byteSize, bool useAbbreviation = true, bool useMetric = true) {
158     return formatFilesize(formatString, byteSize, getAppropriateSize(byteSize, useMetric), useAbbreviation);
159 }
160 
161 public string formatFilesize(ulong byteSize, bool useAbbreviation = true, bool useMetric = true) {
162     return formatFilesize("%.1f", byteSize, useAbbreviation, useMetric);
163 }
164 
165 /** 
166  * Determines the best size denomination to use to describe the given byte
167  * size.
168  * Params:
169  *   byteSize = The number of bytes.
170  *   useMetric = Whether to use metric or IEC style units.
171  * Returns: The best size to describe the given byte size.
172  */
173 public Size getAppropriateSize(ulong byteSize, bool useMetric = true) {
174     import std.traits : EnumMembers;
175     Size[] availableSizes;
176     if (useMetric) {
177         availableSizes = [Size.BYTES, Size.KILOBYTES, Size.MEGABYTES, Size.GIGABYTES, Size.TERABYTES];
178     } else {
179         availableSizes = [Size.BYTES, Size.KIBIBYTES, Size.MEBIBYTES, Size.GIBIBYTES, Size.TEBIBYTES];
180     }
181     foreach (denom; availableSizes) {
182         double num = cast(double) byteSize / denom.sizeFactor;
183         if (useMetric) {
184             if (num < 1000) return denom;
185         } else {
186             if (num < 1024) return denom;
187         }
188     }
189     return useMetric ? Size.TERABYTES : Size.TEBIBYTES;
190 }
191 
192 unittest {
193     assert(getAppropriateSize(42) == Size.BYTES);
194     assert(getAppropriateSize(2048) == Size.KILOBYTES);
195     assert(getAppropriateSize(2048, false) == Size.KIBIBYTES);
196     assert(getAppropriateSize(1_000_000) == Size.MEGABYTES);
197     assert(getAppropriateSize(3_000_000_000) == Size.GIGABYTES);
198     assert(getAppropriateSize(4_000_000_000_000) == Size.TERABYTES);
199     assert(getAppropriateSize(0) == Size.BYTES);
200 }