diff --git a/.gitignore b/.gitignore index 99e0528fc..5c05ee308 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,4 @@ src/*.dmp src/NUL src/nvr/ src/roms/ +/.vs diff --git a/src/disk/minivhd/CREDITS.md b/src/disk/minivhd/CREDITS.md new file mode 100644 index 000000000..c494d4e43 --- /dev/null +++ b/src/disk/minivhd/CREDITS.md @@ -0,0 +1,12 @@ +# Credits +MiniVHD Copyright (c) 2019 Sherman Perry + +MiniVHD was made possible with the help of the following projects + +### libxml2 +**Project Home:** http://www.xmlsoft.org/ +**License:** MIT (see src/libxml2_encoding.c for details) + +### cwalk +**Project Home:** https://likle.github.io/cwalk/ +**Licence:** MIT (https://github.com/likle/cwalk/blob/master/LICENSE.md) diff --git a/src/disk/minivhd/LICENSE b/src/disk/minivhd/LICENSE new file mode 100644 index 000000000..2997be44a --- /dev/null +++ b/src/disk/minivhd/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2019 Sherman Perry + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/src/disk/minivhd/cwalk.c b/src/disk/minivhd/cwalk.c new file mode 100644 index 000000000..265de0f72 --- /dev/null +++ b/src/disk/minivhd/cwalk.c @@ -0,0 +1,1421 @@ +#include +#include +#include +#include +#include +#include "cwalk.h" +/** + * We try to default to a different path style depending on the operating + * system. So this should detect whether we should use windows or unix paths. + */ +#if defined(WIN32) || defined(_WIN32) || \ + defined(__WIN32) && !defined(__CYGWIN__) +static enum cwk_path_style path_style = CWK_STYLE_WINDOWS; +#else +static enum cwk_path_style path_style = CWK_STYLE_UNIX; +#endif + +/** + * This is a list of separators used in different styles. Windows can read + * multiple separators, but it generally outputs just a backslash. The output + * will always use the first character for the output. + */ +static const char *separators[] = {[CWK_STYLE_WINDOWS] = "\\/", + [CWK_STYLE_UNIX] = "/"}; + +/** + * A joined path represents multiple path strings which are concatenated, but + * not (necessarily) stored in contiguous memory. The joined path allows to + * iterate over the segments as if it was one piece of path. + */ +struct cwk_segment_joined +{ + struct cwk_segment segment; + const char **paths; + size_t path_index; +}; + +static size_t cwk_path_output_sized(char *buffer, size_t buffer_size, + size_t position, const char *str, size_t length) +{ + size_t amount_written; + + // First we determine the amount which we can write to the buffer. There are + // three cases. In the first case we have enough to store the whole string in + // it. In the second one we can only store a part of it, and in the third we + // have no space left. + if (buffer_size > position + length) { + amount_written = length; + } else if (buffer_size > position) { + amount_written = buffer_size - position; + } else { + amount_written = 0; + } + + // If we actually want to write out something we will do that here. We will + // always append a '\0', this way we are guaranteed to have a valid string at + // all times. + if (amount_written > 0) { + memmove(&buffer[position], str, amount_written); + } + + // Return the theoretical length which would have been written when everything + // would have fit in the buffer. + return length; +} + +static size_t cwk_path_output_current(char *buffer, size_t buffer_size, + size_t position) +{ + // We output a "current" directory, which is a single character. This + // character is currently not style dependant. + return cwk_path_output_sized(buffer, buffer_size, position, ".", 1); +} + +static size_t cwk_path_output_back(char *buffer, size_t buffer_size, + size_t position) +{ + // We output a "back" directory, which ahs two characters. This + // character is currently not style dependant. + return cwk_path_output_sized(buffer, buffer_size, position, "..", 2); +} + +static size_t cwk_path_output_separator(char *buffer, size_t buffer_size, + size_t position) +{ + // We output a separator, which is a single character. + return cwk_path_output_sized(buffer, buffer_size, position, + separators[path_style], 1); +} + +static size_t cwk_path_output_dot(char *buffer, size_t buffer_size, + size_t position) +{ + // We output a dot, which is a single character. This is used for extensions. + return cwk_path_output_sized(buffer, buffer_size, position, ".", 1); +} + +static size_t cwk_path_output(char *buffer, size_t buffer_size, size_t position, + const char *str) +{ + size_t length; + + // This just does a sized output internally, but first measuring the + // null-terminated string. + length = strlen(str); + return cwk_path_output_sized(buffer, buffer_size, position, str, length); +} + +static void cwk_path_terminate_output(char *buffer, size_t buffer_size, + size_t pos) +{ + if (buffer_size > 0) { + if (pos >= buffer_size) { + buffer[buffer_size - 1] = '\0'; + } else { + buffer[pos] = '\0'; + } + } +} + +static bool cwk_path_is_string_equal(const char *first, const char *second, + size_t n) +{ + // If the path style is UNIX, we will compare case sensitively. This can be + // done easily using strncmp. + if (path_style == CWK_STYLE_UNIX) { + return strncmp(first, second, n) == 0; + } + + // However, if this is windows we will have to compare case insensitively. + // Since there is no standard method to do that we will have to do it on our + // own. + while (*first && *second && n > 0) { + // We can consider the string to be not equal if the two lowercase + // characters are not equal. + if (tolower(*first++) != tolower(*second++)) { + return false; + } + + --n; + } + + // We can consider the string to be equal if we either reached n == 0 or both + // cursors point to a null character. + return n == 0 || (*first == '\0' && *second == '\0'); +} + +static const char *cwk_path_find_next_stop(const char *c) +{ + // We just move forward until we find a '\0' or a separator, which will be our + // next "stop". + while (*c != '\0' && !cwk_path_is_separator(c)) { + ++c; + } + + // Return the pointer of the next stop. + return c; +} + +static const char *cwk_path_find_previous_stop(const char *begin, const char *c) +{ + // We just move back until we find a separator or reach the beginning of the + // path, which will be our previous "stop". + while (c > begin && !cwk_path_is_separator(c)) { + --c; + } + + // Return the pointer to the previous stop. We have to return the first + // character after the separator, not on the separator itself. + if (cwk_path_is_separator(c)) { + return c + 1; + } else { + return c; + } +} + +static bool cwk_path_get_first_segment_without_root(const char *path, + const char *segments, struct cwk_segment *segment) +{ + // Let's remember the path. We will move the path pointer afterwards, that's + // why this has to be done first. + segment->path = path; + segment->segments = segments; + + // Now let's check whether this is an empty string. An empty string has no + // segment it could use. + if (*segments == '\0') { + return false; + } + + // If the string starts with separators, we will jump over those. If there is + // only a slash and a '\0' after it, we can't determine the first segment + // since there is none. + while (cwk_path_is_separator(segments)) { + ++segments; + if (*segments == '\0') { + return false; + } + } + + // So this is the beginning of our segment. + segment->begin = segments; + + // Now let's determine the end of the segment, which we do by moving the path + // pointer further until we find a separator. + segments = cwk_path_find_next_stop(segments); + + // And finally, calculate the size of the segment by subtracting the position + // from the end. + segment->size = segments - segment->begin; + segment->end = segments; + + // Tell the caller that we found a segment. + return true; +} + +static bool cwk_path_get_last_segment_without_root(const char *path, + struct cwk_segment *segment) +{ + // Now this is fairly similar to the normal algorithm, however, it will assume + // that there is no root in the path. So we grab the first segment at this + // position, assuming there is no root. + if (!cwk_path_get_first_segment_without_root(path, path, segment)) { + return false; + } + + // Now we find our last segment. The segment struct of the caller + // will contain the last segment, since the function we call here will not + // change the segment struct when it reaches the end. + while (cwk_path_get_next_segment(segment)) { + // We just loop until there is no other segment left. + } + + return true; +} + +static bool cwk_path_get_first_segment_joined(const char **paths, + struct cwk_segment_joined *sj) +{ + bool result; + + // Prepare the first segment. We position the joined segment on the first path + // and assign the path array to the struct. + sj->path_index = 0; + sj->paths = paths; + + // We loop through all paths until we find one which has a segment. The result + // is stored in a variable, so we can let the caller know whether we found one + // or not. + result = false; + while (paths[sj->path_index] != NULL && + (result = cwk_path_get_first_segment(paths[sj->path_index], + &sj->segment)) == false) { + ++sj->path_index; + } + + return result; +} + +static bool cwk_path_get_next_segment_joined(struct cwk_segment_joined *sj) +{ + bool result; + + if (sj->paths[sj->path_index] == NULL) { + // We reached already the end of all paths, so there is no other segment + // left. + return false; + } else if (cwk_path_get_next_segment(&sj->segment)) { + // There was another segment on the current path, so we are good to + // continue. + return true; + } + + // We try to move to the next path which has a segment available. We must at + // least move one further since the current path reached the end. + result = false; + + do { + ++sj->path_index; + + // And we obviously have to stop this loop if there are no more paths left. + if (sj->paths[sj->path_index] == NULL) { + break; + } + + // Grab the first segment of the next path and determine whether this path + // has anything useful in it. There is one more thing we have to consider + // here - for the first time we do this we want to skip the root, but + // afterwards we will consider that to be part of the segments. + result = cwk_path_get_first_segment_without_root(sj->paths[sj->path_index], + sj->paths[sj->path_index], &sj->segment); + + } while (!result); + + // Finally, report the result back to the caller. + return result; +} + +static bool cwk_path_get_previous_segment_joined(struct cwk_segment_joined *sj) +{ + bool result; + + if (*sj->paths == NULL) { + // It's possible that there is no initialized segment available in the + // struct since there are no paths. In that case we can return false, since + // there is no previous segment. + return false; + } else if (cwk_path_get_previous_segment(&sj->segment)) { + // Now we try to get the previous segment from the current path. If we can + // do that successfully, we can let the caller know that we found one. + return true; + } + + result = false; + + do { + // We are done once we reached index 0. In that case there are no more + // segments left. + if (sj->path_index == 0) { + break; + } + + // There is another path which we have to inspect. So we decrease the path + // index. + --sj->path_index; + + // If this is the first path we will have to consider that this path might + // include a root, otherwise we just treat is as a segment. + if (sj->path_index == 0) { + result = cwk_path_get_last_segment(sj->paths[sj->path_index], + &sj->segment); + } else { + result = cwk_path_get_last_segment_without_root(sj->paths[sj->path_index], + &sj->segment); + } + + } while (!result); + + return result; +} + +static bool cwk_path_segment_back_will_be_removed(struct cwk_segment_joined *sj) +{ + enum cwk_segment_type type; + int counter; + + // We are handling back segments here. We must verify how many back segments + // and how many normal segments come before this one to decide whether we keep + // or remove it. + + // The counter determines how many normal segments are our current segment, + // which will popped off before us. If the counter goes above zero it means + // that our segment will be popped as well. + counter = 0; + + // We loop over all previous segments until we either reach the beginning, + // which means our segment will not be dropped or the counter goes above zero. + while (cwk_path_get_previous_segment_joined(sj)) { + + // Now grab the type. The type determines whether we will increase or + // decrease the counter. We don't handle a CWK_CURRENT frame here since it + // has no influence. + type = cwk_path_get_segment_type(&sj->segment); + if (type == CWK_NORMAL) { + // This is a normal segment. The normal segment will increase the counter + // since it neutralizes one back segment. If we go above zero we can + // return immediately. + ++counter; + if (counter > 0) { + return true; + } + } else if (type == CWK_BACK) { + // A CWK_BACK segment will reduce the counter by one. We can not remove a + // back segment as long we are not above zero since we don't have the + // opposite normal segment which we would remove. + --counter; + } + } + + // We never got a count larger than zero, so we will keep this segment alive. + return false; +} + +static bool cwk_path_segment_normal_will_be_removed( + struct cwk_segment_joined *sj) +{ + enum cwk_segment_type type; + int counter; + + // The counter determines how many segments are above our current segment, + // which will popped off before us. If the counter goes below zero it means + // that our segment will be popped as well. + counter = 0; + + // We loop over all following segments until we either reach the end, which + // means our segment will not be dropped or the counter goes below zero. + while (cwk_path_get_next_segment_joined(sj)) { + + // First, grab the type. The type determines whether we will increase or + // decrease the counter. We don't handle a CWK_CURRENT frame here since it + // has no influence. + type = cwk_path_get_segment_type(&sj->segment); + if (type == CWK_NORMAL) { + // This is a normal segment. The normal segment will increase the counter + // since it will be removed by a "../" before us. + ++counter; + } else if (type == CWK_BACK) { + // A CWK_BACK segment will reduce the counter by one. If we are below zero + // we can return immediately. + --counter; + if (counter < 0) { + return true; + } + } + } + + // We never got a negative count, so we will keep this segment alive. + return false; +} + +static bool +cwk_path_segment_will_be_removed(const struct cwk_segment_joined *sj, + bool absolute) +{ + enum cwk_segment_type type; + struct cwk_segment_joined sjc; + + // We copy the joined path so we don't need to modify it. + sjc = *sj; + + // First we check whether this is a CWK_CURRENT or CWK_BACK segment, since + // those will always be dropped. + type = cwk_path_get_segment_type(&sj->segment); + if (type == CWK_CURRENT) { + return true; + } else if (type == CWK_BACK && absolute) { + return true; + } else if (type == CWK_BACK) { + return cwk_path_segment_back_will_be_removed(&sjc); + } else { + return cwk_path_segment_normal_will_be_removed(&sjc); + } +} + +static bool +cwk_path_segment_joined_skip_invisible(struct cwk_segment_joined *sj, + bool absolute) +{ + while (cwk_path_segment_will_be_removed(sj, absolute)) { + if (!cwk_path_get_next_segment_joined(sj)) { + return false; + } + } + + return true; +} + +static void cwk_path_get_root_windows(const char *path, size_t *length) +{ + const char *c; + bool is_device_path; + + // A device path is a path which starts with "\\." or "\\?". A device path can + // be a UNC path as well, in which case it will take up one more segment. + is_device_path = false; + + // We can not determine the root if this is an empty string. So we set the + // root to NULL and the length to zero and cancel the whole thing. + c = path; + *length = 0; + if (!*c) { + return; + } + + // Now we have to verify whether this is a windows network path (UNC), which + // we will consider our root. + if (cwk_path_is_separator(c)) { + ++c; + + // Check whether the path starts with a single back slash, which means this + // is not a network path - just a normal path starting with a backslash. + if (!cwk_path_is_separator(c)) { + // Okay, this is not a network path but we still use the backslash as a + // root. + ++(*length); + return; + } + + // Yes, this is a network or device path. Skip the previous separator. Now + // we need to determine whether this is a device path. We might advance one + // character here if the server name starts with a '?' or a '.', but that's + // fine since we will search for a separator afterwards anyway. + ++c; + is_device_path = (*c == '?' || *c == '.') && cwk_path_is_separator(++c); + if (is_device_path) { + // That's a device path, and the root must be either "\\.\" or "\\?\" + // which is 4 characters long. (at least that's how Windows + // GetFullPathName behaves.) + *length = 4; + return; + } + + // We will grab anything up to the next stop. The next top might be a '\0' + // or another separator. That will be the server name. + c = cwk_path_find_next_stop(c); + + // If this is a separator and not the end of a string we wil have to include + // it. However, if this is a '\0' we must not skip it. + while (cwk_path_is_separator(c)) { + ++c; + } + + // We are now skipping the shared folder name, which will end after the + // next stop. + c = cwk_path_find_next_stop(c); + + // Then there might be a separator at the end. We will include that as well, + // it will mark the path as absolute. + if (cwk_path_is_separator(c)) { + ++c; + } + + // Finally, calculate the size of the root. + *length = c - path; + return; + } + + // Move to the next and check whether this is a colon. + if (*++c == ':') { + *length = 2; + + // Now check whether this is a backslash (or slash). If it is not, we could + // assume that the next character is a '\0' if it is a valid path. However, + // we will not assume that - since ':' is not valid in a path it must be a + // mistake by the caller than. We will try to understand it anyway. + if (cwk_path_is_separator(++c)) { + *length = 3; + } + } +} + +static void cwk_path_get_root_unix(const char *path, size_t *length) +{ + // The slash of the unix path represents the root. There is no root if there + // is no slash. + if (cwk_path_is_separator(path)) { + *length = 1; + } else { + *length = 0; + } +} + +static bool cwk_path_is_root_absolute(const char *path, size_t length) +{ + // This is definitely not absolute if there is no root. + if (length == 0) { + return false; + } + + // If there is a separator at the end of the root, we can safely consider this + // to be an absolute path. + return cwk_path_is_separator(&path[length - 1]); +} + +static size_t cwk_path_join_and_normalize_multiple(const char **paths, + char *buffer, size_t buffer_size) +{ + size_t pos; + bool absolute, has_segment_output; + struct cwk_segment_joined sj; + + // We initialize the position after the root, which should get us started. + cwk_path_get_root(paths[0], &pos); + + // Determine whether the path is absolute or not. We need that to determine + // later on whether we can remove superfluous "../" or not. + absolute = cwk_path_is_root_absolute(paths[0], pos); + + // First copy the root to the output. We will not modify the root. + cwk_path_output_sized(buffer, buffer_size, 0, paths[0], pos); + + // So we just grab the first segment. If there is no segment we will always + // output a "/", since we currently only support absolute paths here. + if (!cwk_path_get_first_segment_joined(paths, &sj)) { + goto done; + } + + // Let's assume that we don't have any segment output for now. We will toggle + // this flag once there is some output. + has_segment_output = false; + + do { + // Check whether we have to drop this segment because of resolving a + // relative path or because it is a CWK_CURRENT segment. + if (cwk_path_segment_will_be_removed(&sj, absolute)) { + continue; + } + + // Remember that we have segment output, so we can handle the trailing slash + // later on. This is necessary since we might have segments but they are all + // removed. + has_segment_output = true; + + // Write out the segment but keep in mind that we need to follow the + // buffer size limitations. That's why we use the path output functions + // here. + pos += cwk_path_output_sized(buffer, buffer_size, pos, sj.segment.begin, + sj.segment.size); + pos += cwk_path_output_separator(buffer, buffer_size, pos); + } while (cwk_path_get_next_segment_joined(&sj)); + + // Remove the trailing slash, but only if we have segment output. We don't + // want to remove anything from the root. + if (has_segment_output) { + --pos; + } else if (pos == 0) { + // This may happen if the path is absolute and all segments have been + // removed. We can not have an empty output - and empty output means we stay + // in the current directory. So we will output a ".". + assert(absolute == false); + pos += cwk_path_output_current(buffer, buffer_size, pos); + } + + // We must append a '\0' in any case, unless the buffer size is zero. If the + // buffer size is zero, which means we can not. +done: + cwk_path_terminate_output(buffer, buffer_size, pos); + + // And finally let our caller know about the total size of the normalized + // path. + return pos; +} + +size_t cwk_path_get_absolute(const char *base, const char *path, char *buffer, + size_t buffer_size) +{ + size_t i; + const char *paths[4]; + + // The basename should be an absolute path if the caller is using the API + // correctly. However, he might not and in that case we will append a fake + // root at the beginning. + if (cwk_path_is_absolute(base)) { + i = 0; + } else { + paths[0] = "/"; + i = 1; + } + + if (cwk_path_is_absolute(path)) { + // If the submitted path is not relative the base path becomes irrelevant. + // We will only normalize the submitted path instead. + paths[i++] = path; + paths[i] = NULL; + } else { + // Otherwise we append the relative path to the base path and normalize it. + // The result will be a new absolute path. + paths[i++] = base; + paths[i++] = path; + paths[i] = NULL; + } + + // Finally join everything together and normalize it. + return cwk_path_join_and_normalize_multiple(paths, buffer, buffer_size); +} + +static void cwk_path_skip_segments_until_diverge(struct cwk_segment_joined *bsj, + struct cwk_segment_joined *osj, bool absolute, bool *base_available, + bool *other_available) +{ + // Now looping over all segments until they start to diverge. A path may + // diverge if two segments are not equal or if one path reaches the end. + do { + + // Check whether there is anything available after we skip everything which + // is invisible. We do that for both paths, since we want to let the caller + // know which path has some trailing segments after they diverge. + *base_available = cwk_path_segment_joined_skip_invisible(bsj, absolute); + *other_available = cwk_path_segment_joined_skip_invisible(osj, absolute); + + // We are done if one or both of those paths reached the end. They either + // diverge or both reached the end - but in both cases we can not continue + // here. + if (!*base_available || !*other_available) { + break; + } + + // Compare the content of both segments. We are done if they are not equal, + // since they diverge. + if (!cwk_path_is_string_equal(bsj->segment.begin, osj->segment.begin, + bsj->segment.size)) { + break; + } + + // We keep going until one of those segments reached the end. The next + // segment might be invisible, but we will check for that in the beginning + // of the loop once again. + *base_available = cwk_path_get_next_segment_joined(bsj); + *other_available = cwk_path_get_next_segment_joined(osj); + } while (*base_available && *other_available); +} + +size_t cwk_path_get_relative(const char *base_directory, const char *path, + char *buffer, size_t buffer_size) +{ + size_t pos, base_root_length, path_root_length; + bool absolute, base_available, other_available, has_output; + const char *base_paths[2], *other_paths[2]; + struct cwk_segment_joined bsj, osj; + + pos = 0; + + // First we compare the roots of those two paths. If the roots are not equal + // we can't continue, since there is no way to get a relative path from + // different roots. + cwk_path_get_root(base_directory, &base_root_length); + cwk_path_get_root(path, &path_root_length); + if (!cwk_path_is_string_equal(base_directory, path, base_root_length)) { + return pos; + } + + // Verify whether this is an absolute path. We need to know that since we can + // remove all back-segments if it is. + absolute = cwk_path_is_root_absolute(base_directory, base_root_length); + + // Initialize our joined segments. This will allow us to use the internal + // functions to skip until diverge and invisible. We only have one path in + // them though. + base_paths[0] = base_directory; + base_paths[1] = NULL; + other_paths[0] = path; + other_paths[1] = NULL; + cwk_path_get_first_segment_joined(base_paths, &bsj); + cwk_path_get_first_segment_joined(other_paths, &osj); + + // Okay, now we skip until the segments diverge. We don't have anything to do + // with the segments which are equal. + cwk_path_skip_segments_until_diverge(&bsj, &osj, absolute, &base_available, + &other_available); + + // Assume there is no output until we have got some. We will need this + // information later on to remove trailing slashes or alternatively output a + // current-segment. + has_output = false; + + // So if we still have some segments left in the base path we will now output + // a back segment for all of them. + if (base_available) { + do { + // Skip any invisible segment. We don't care about those and we don't need + // to navigate back because of them. + if (!cwk_path_segment_joined_skip_invisible(&bsj, absolute)) { + break; + } + + // Toggle the flag if we have output. We need to remember that, since we + // want to remove the trailing slash. + has_output = true; + + // Output the back segment and a separator. No need to worry about the + // superfluous segment since it will be removed later on. + pos += cwk_path_output_back(buffer, buffer_size, pos); + pos += cwk_path_output_separator(buffer, buffer_size, pos); + } while (cwk_path_get_next_segment_joined(&bsj)); + } + + // And if we have some segments available of the target path we will output + // all of those. + if (other_available) { + do { + // Again, skip any invisible segments since we don't need to navigate into + // them. + if (!cwk_path_segment_joined_skip_invisible(&osj, absolute)) { + break; + } + + // Toggle the flag if we have output. We need to remember that, since we + // want to remove the trailing slash. + has_output = true; + + // Output the current segment and a separator. No need to worry about the + // superfluous segment since it will be removed later on. + pos += cwk_path_output_sized(buffer, buffer_size, pos, osj.segment.begin, + osj.segment.size); + pos += cwk_path_output_separator(buffer, buffer_size, pos); + } while (cwk_path_get_next_segment_joined(&osj)); + } + + // If we have some output by now we will have to remove the trailing slash. We + // simply do that by moving back one character. The terminate output function + // will then place the '\0' on this position. Otherwise, if there is no + // output, we will have to output a "current directory", since the target path + // points to the base path. + if (has_output) { + --pos; + } else { + pos += cwk_path_output_current(buffer, buffer_size, pos); + } + + // Finally, we can terminate the output - which means we place a '\0' at the + // current position or at the end of the buffer. + cwk_path_terminate_output(buffer, buffer_size, pos); + + return pos; +} + +size_t cwk_path_join(const char *path_a, const char *path_b, char *buffer, + size_t buffer_size) +{ + const char *paths[3]; + + // This is simple. We will just create an array with the two paths which we + // wish to join. + paths[0] = path_a; + paths[1] = path_b; + paths[2] = NULL; + + // And then call the join and normalize function which will do the hard work + // for us. + return cwk_path_join_and_normalize_multiple(paths, buffer, buffer_size); +} + +size_t cwk_path_join_multiple(const char **paths, char *buffer, + size_t buffer_size) +{ + // We can just call the internal join and normalize function for this one, + // since it will handle everything. + return cwk_path_join_and_normalize_multiple(paths, buffer, buffer_size); +} + +void cwk_path_get_root(const char *path, size_t *length) +{ + // We use a different implementation here based on the configuration of the + // library. + if (path_style == CWK_STYLE_WINDOWS) { + cwk_path_get_root_windows(path, length); + } else { + cwk_path_get_root_unix(path, length); + } +} + +size_t cwk_path_change_root(const char *path, const char *new_root, + char *buffer, size_t buffer_size) +{ + const char *tail; + size_t root_length, path_length, tail_length, new_root_length, new_path_size; + + // First we need to determine the actual size of the root which we will + // change. + cwk_path_get_root(path, &root_length); + + // Now we determine the sizes of the new root and the path. We need that to + // determine the size of the part after the root (the tail). + new_root_length = strlen(new_root); + path_length = strlen(path); + + // Okay, now we calculate the position of the tail and the length of it. + tail = path + root_length; + tail_length = path_length - root_length; + + // We first output the tail and then the new root, that's because the source + // path and the buffer may be overlapping. This way the root will not + // overwrite the tail. + cwk_path_output_sized(buffer, buffer_size, new_root_length, tail, + tail_length); + cwk_path_output_sized(buffer, buffer_size, 0, new_root, new_root_length); + + // Finally we calculate the size o the new path and terminate the output with + // a '\0'. + new_path_size = tail_length + new_root_length; + cwk_path_terminate_output(buffer, buffer_size, new_path_size); + + return new_path_size; +} + +bool cwk_path_is_absolute(const char *path) +{ + size_t length; + + // We grab the root of the path. This root does not include the first + // separator of a path. + cwk_path_get_root(path, &length); + + // Now we can determine whether the root is absolute or not. + return cwk_path_is_root_absolute(path, length); +} + +bool cwk_path_is_relative(const char *path) +{ + // The path is relative if it is not absolute. + return !cwk_path_is_absolute(path); +} + +void cwk_path_get_basename(const char *path, const char **basename, + size_t *length) +{ + struct cwk_segment segment; + + // We get the last segment of the path. The last segment will contain the + // basename if there is any. If there are no segments we will set the basename + // to NULL and the length to 0. + if (!cwk_path_get_last_segment(path, &segment)) { + *basename = NULL; + *length = 0; + return; + } + + // Now we can just output the segment contents, since that's our basename. + // There might be trailing separators after the basename, but the size does + // not include those. + *basename = segment.begin; + *length = segment.size; +} + +size_t cwk_path_change_basename(const char *path, const char *new_basename, + char *buffer, size_t buffer_size) +{ + struct cwk_segment segment; + size_t pos, root_size, new_basename_size; + + // First we try to get the last segment. We may only have a root without any + // segments, in which case we will create one. + if (!cwk_path_get_last_segment(path, &segment)) { + + // So there is no segment in this path. First we grab the root and output + // that. We are not going to modify the root in any way. + cwk_path_get_root(path, &root_size); + pos = cwk_path_output_sized(buffer, buffer_size, 0, path, root_size); + + // We have to trim the separators from the beginning of the new basename. + // This is quite easy to do. + while (cwk_path_is_separator(new_basename)) { + ++new_basename; + } + + // Now we measure the length of the new basename, this is a two step + // process. First we find the '\0' character at the end of the string. + new_basename_size = 0; + while (new_basename[new_basename_size]) { + ++new_basename_size; + } + + // And then we trim the separators at the end of the basename until we reach + // the first valid character. + while (new_basename_size > 0 && + cwk_path_is_separator(&new_basename[new_basename_size - 1])) { + --new_basename_size; + } + + // Now we will output the new basename after the root. + pos += cwk_path_output_sized(buffer, buffer_size, pos, new_basename, + new_basename_size); + + // And finally terminate the output and return the total size of the path. + cwk_path_terminate_output(buffer, buffer_size, pos); + return pos; + } + + // If there is a last segment we can just forward this call, which is fairly + // easy. + return cwk_path_change_segment(&segment, new_basename, buffer, buffer_size); +} + +void cwk_path_get_dirname(const char *path, size_t *length) +{ + struct cwk_segment segment; + + // We get the last segment of the path. The last segment will contain the + // basename if there is any. If there are no segments we will set the length + // to 0. + if (!cwk_path_get_last_segment(path, &segment)) { + *length = 0; + return; + } + + // We can now return the length from the beginning of the string up to the + // beginning of the last segment. + *length = segment.begin - path; +} + +bool cwk_path_get_extension(const char *path, const char **extension, + size_t *length) +{ + struct cwk_segment segment; + const char *c; + + // We get the last segment of the path. The last segment will contain the + // extension if there is any. + if (!cwk_path_get_last_segment(path, &segment)) { + return false; + } + + // Now we search for a dot within the segment. If there is a dot, we consider + // the rest of the segment the extension. We do this from the end towards the + // beginning, since we want to find the last dot. + for (c = segment.end; c >= segment.begin; --c) { + if (*c == '.') { + // Okay, we found an extension. We can stop looking now. + *extension = c; + *length = segment.end - c; + return true; + } + } + + // We couldn't find any extension. + return false; +} + +bool cwk_path_has_extension(const char *path) +{ + const char *extension; + size_t length; + + // We just wrap the get_extension call which will then do the work for us. + return cwk_path_get_extension(path, &extension, &length); +} + +size_t cwk_path_change_extension(const char *path, const char *new_extension, + char *buffer, size_t buffer_size) +{ + struct cwk_segment segment; + const char *c, *old_extension; + size_t pos, root_size, trail_size, new_extension_size; + + // First we try to get the last segment. We may only have a root without any + // segments, in which case we will create one. + if (!cwk_path_get_last_segment(path, &segment)) { + + // So there is no segment in this path. First we grab the root and output + // that. We are not going to modify the root in any way. If there is no + // root, this will end up with a root size 0, and nothing will be written. + cwk_path_get_root(path, &root_size); + pos = cwk_path_output_sized(buffer, buffer_size, 0, path, root_size); + + // Add a dot if the submitted value doesn't have any. + if (*new_extension != '.') { + pos += cwk_path_output_dot(buffer, buffer_size, pos); + } + + // And finally terminate the output and return the total size of the path. + pos += cwk_path_output(buffer, buffer_size, pos, new_extension); + cwk_path_terminate_output(buffer, buffer_size, pos); + return pos; + } + + // Now we seek the old extension in the last segment, which we will replace + // with the new one. If there is no old extension, it will point to the end of + // the segment. + old_extension = segment.end; + for (c = segment.begin; c < segment.end; ++c) { + if (*c == '.') { + old_extension = c; + } + } + + pos = cwk_path_output_sized(buffer, buffer_size, 0, segment.path, + old_extension - segment.path); + + // If the new extension starts with a dot, we will skip that dot. We always + // output exactly one dot before the extension. If the extension contains + // multiple dots, we will output those as part of the extension. + if (*new_extension == '.') { + ++new_extension; + } + + // We calculate the size of the new extension, including the dot, in order to + // output the trail - which is any part of the path coming after the + // extension. We must output this first, since the buffer may overlap with the + // submitted path - and it would be overridden by longer extensions. + new_extension_size = strlen(new_extension) + 1; + trail_size = cwk_path_output(buffer, buffer_size, pos + new_extension_size, + segment.end); + + // Finally we output the dot and the new extension. The new extension itself + // doesn't contain the dot anymore, so we must output that first. + pos += cwk_path_output_dot(buffer, buffer_size, pos); + pos += cwk_path_output(buffer, buffer_size, pos, new_extension); + + // Now we terminate the output with a null-terminating character, but before + // we do that we must add the size of the trail to the position which we + // output before. + pos += trail_size; + cwk_path_terminate_output(buffer, buffer_size, pos); + + // And the position is our output size now. + return pos; +} + +size_t cwk_path_normalize(const char *path, char *buffer, size_t buffer_size) +{ + const char *paths[2]; + + // Now we initialize the paths which we will normalize. Since this function + // only supports submitting a single path, we will only add that one. + paths[0] = path; + paths[1] = NULL; + + return cwk_path_join_and_normalize_multiple(paths, buffer, buffer_size); +} + +size_t cwk_path_get_intersection(const char *path_base, const char *path_other) +{ + bool absolute; + size_t base_root_length, other_root_length; + const char *end; + const char *paths_base[2], *paths_other[2]; + struct cwk_segment_joined base, other; + + // We first compare the two roots. We just return zero if they are not equal. + // This will also happen to return zero if the paths are mixed relative and + // absolute. + cwk_path_get_root(path_base, &base_root_length); + cwk_path_get_root(path_other, &other_root_length); + if (!cwk_path_is_string_equal(path_base, path_other, base_root_length)) { + return 0; + } + + // Configure our paths. We just have a single path in here for now. + paths_base[0] = path_base; + paths_base[1] = NULL; + paths_other[0] = path_other; + paths_other[1] = NULL; + + // So we get the first segment of both paths. If one of those paths don't have + // any segment, we will return 0. + if (!cwk_path_get_first_segment_joined(paths_base, &base) || + !cwk_path_get_first_segment_joined(paths_other, &other)) { + return base_root_length; + } + + // We now determine whether the path is absolute or not. This is required + // because if will ignore removed segments, and this behaves differently if + // the path is absolute. However, we only need to check the base path because + // we are guaranteed that both paths are either relative or absolute. + absolute = cwk_path_is_root_absolute(path_base, base_root_length); + + // We must keep track of the end of the previous segment. Initially, this is + // set to the beginning of the path. This means that 0 is returned if the + // first segment is not equal. + end = path_base + base_root_length; + + // Now we loop over both segments until one of them reaches the end or their + // contents are not equal. + do { + // We skip all segments which will be removed in each path, since we want to + // know about the true path. + if (!cwk_path_segment_joined_skip_invisible(&base, absolute) || + !cwk_path_segment_joined_skip_invisible(&other, absolute)) { + break; + } + + if (!cwk_path_is_string_equal(base.segment.begin, other.segment.begin, + base.segment.size)) { + // So the content of those two segments are not equal. We will return the + // size up to the beginning. + return end - path_base; + } + + // Remember the end of the previous segment before we go to the next one. + end = base.segment.end; + } while (cwk_path_get_next_segment_joined(&base) && + cwk_path_get_next_segment_joined(&other)); + + // Now we calculate the length up to the last point where our paths pointed to + // the same place. + return end - path_base; +} + +bool cwk_path_get_first_segment(const char *path, struct cwk_segment *segment) +{ + size_t length; + const char *segments; + + // We skip the root since that's not part of the first segment. The root is + // treated as a separate entity. + cwk_path_get_root(path, &length); + segments = path + length; + + // Now, after we skipped the root we can continue and find the actual segment + // content. + return cwk_path_get_first_segment_without_root(path, segments, segment); +} + +bool cwk_path_get_last_segment(const char *path, struct cwk_segment *segment) +{ + // We first grab the first segment. This might be our last segment as well, + // but we don't know yet. There is no last segment if there is no first + // segment, so we return false in that case. + if (!cwk_path_get_first_segment(path, segment)) { + return false; + } + + // Now we find our last segment. The segment struct of the caller + // will contain the last segment, since the function we call here will not + // change the segment struct when it reaches the end. + while (cwk_path_get_next_segment(segment)) { + // We just loop until there is no other segment left. + } + + return true; +} + +bool cwk_path_get_next_segment(struct cwk_segment *segment) +{ + const char *c; + + // First we jump to the end of the previous segment. The first character must + // be either a '\0' or a separator. + c = segment->begin + segment->size; + if (*c == '\0') { + return false; + } + + // Now we skip all separator until we reach something else. We are not yet + // guaranteed to have a segment, since the string could just end afterwards. + assert(cwk_path_is_separator(c)); + do { + ++c; + } while (cwk_path_is_separator(c)); + + // If the string ends here, we can safely assume that there is no other + // segment after this one. + if (*c == '\0') { + return false; + } + + // Now we are safe to assume there is a segment. We store the beginning of + // this segment in the segment struct of the caller. + segment->begin = c; + + // And now determine the size of this segment, and store it in the struct of + // the caller as well. + c = cwk_path_find_next_stop(c); + segment->end = c; + segment->size = c - segment->begin; + + // Tell the caller that we found a segment. + return true; +} + +bool cwk_path_get_previous_segment(struct cwk_segment *segment) +{ + const char *c; + + // The current position might point to the first character of the path, which + // means there are no previous segments available. + c = segment->begin; + if (c <= segment->segments) { + return false; + } + + // We move towards the beginning of the path until we either reached the + // beginning or the character is no separator anymore. + do { + --c; + if (c <= segment->segments) { + // So we reached the beginning here and there is no segment. So we return + // false and don't change the segment structure submitted by the caller. + return false; + } + } while (cwk_path_is_separator(c)); + + // We are guaranteed now that there is another segment, since we moved before + // the previous separator and did not reach the segment path beginning. + segment->end = c + 1; + segment->begin = cwk_path_find_previous_stop(segment->segments, c); + segment->size = segment->end - segment->begin; + + return true; +} + +enum cwk_segment_type cwk_path_get_segment_type( + const struct cwk_segment *segment) +{ + // We just make a string comparison with the segment contents and return the + // appropriate type. + if (strncmp(segment->begin, ".", segment->size) == 0) { + return CWK_CURRENT; + } else if (strncmp(segment->begin, "..", segment->size) == 0) { + return CWK_BACK; + } + + return CWK_NORMAL; +} + +bool cwk_path_is_separator(const char *str) +{ + const char *c; + + // We loop over all characters in the read symbols. + c = separators[path_style]; + while (*c) { + if (*c == *str) { + return true; + } + + ++c; + } + + return false; +} + +size_t cwk_path_change_segment(struct cwk_segment *segment, const char *value, + char *buffer, size_t buffer_size) +{ + size_t pos, value_size, tail_size; + + // First we have to output the head, which is the whole string up to the + // beginning of the segment. This part of the path will just stay the same. + pos = cwk_path_output_sized(buffer, buffer_size, 0, segment->path, + segment->begin - segment->path); + + // In order to trip the submitted value, we will skip any separator at the + // beginning of it and behave as if it was never there. + while (cwk_path_is_separator(value)) { + ++value; + } + + // Now we determine the length of the value. In order to do that we first + // locate the '\0'. + value_size = 0; + while (value[value_size]) { + ++value_size; + } + + // Since we trim separators at the beginning and in the end of the value we + // have to subtract from the size until there are either no more characters + // left or the last character is no separator. + while (value_size > 0 && cwk_path_is_separator(&value[value_size - 1])) { + --value_size; + } + + // We also have to determine the tail size, which is the part of the string + // following the current segment. This part will not change. + tail_size = strlen(segment->end); + + // Now we output the tail. We have to do that, because if the buffer and the + // source are overlapping we would override the tail if the value is + // increasing in length. + cwk_path_output_sized(buffer, buffer_size, pos + value_size, segment->end, + tail_size); + + // Finally we can output the value in the middle of the head and the tail, + // where we have enough space to fit the whole trimmed value. + pos += cwk_path_output_sized(buffer, buffer_size, pos, value, value_size); + + // Now we add the tail size to the current position and terminate the output - + // basically, ensure that there is a '\0' at the end of the buffer. + pos += tail_size; + cwk_path_terminate_output(buffer, buffer_size, pos); + + // And now tell the caller how long the whole path would be. + return pos; +} + +enum cwk_path_style cwk_path_guess_style(const char *path) +{ + const char *c; + size_t root_length; + struct cwk_segment segment; + + // First we determine the root. Only windows roots can be longer than a single + // slash, so if we can determine that it starts with something like "C:", we + // know that this is a windows path. + cwk_path_get_root_windows(path, &root_length); + if (root_length > 1) { + return CWK_STYLE_WINDOWS; + } + + // Next we check for slashes. Windows uses backslashes, while unix uses + // forward slashes. Windows actually supports both, but our best guess is to + // assume windows with backslashes and unix with forward slashes. + for (c = path; *c; ++c) { + if (*c == *separators[CWK_STYLE_UNIX]) { + return CWK_STYLE_UNIX; + } else if (*c == *separators[CWK_STYLE_WINDOWS]) { + return CWK_STYLE_WINDOWS; + } + } + + // This path does not have any slashes. We grab the last segment (which + // actually must be the first one), and determine whether the segment starts + // with a dot. A dot is a hidden folder or file in the UNIX world, in that + // case we assume the path to have UNIX style. + if (!cwk_path_get_last_segment(path, &segment)) { + // We couldn't find any segments, so we default to a UNIX path style since + // there is no way to make any assumptions. + return CWK_STYLE_UNIX; + } + + if (*segment.begin == '.') { + return CWK_STYLE_UNIX; + } + + // And finally we check whether the last segment contains a dot. If it + // contains a dot, that might be an extension. Windows is more likely to have + // file names with extensions, so our guess would be windows. + for (c = segment.begin; *c; ++c) { + if (*c == '.') { + return CWK_STYLE_WINDOWS; + } + } + + // All our checks failed, so we will return a default value which is currently + // UNIX. + return CWK_STYLE_UNIX; +} + +void cwk_path_set_style(enum cwk_path_style style) +{ + // We can just set the global path style variable and then the behaviour for + // all functions will change accordingly. + assert(style == CWK_STYLE_UNIX || style == CWK_STYLE_WINDOWS); + path_style = style; +} + +enum cwk_path_style cwk_path_get_style(void) +{ + // Simply return the path style which we store in a global variable. + return path_style; +} diff --git a/src/disk/minivhd/cwalk.h b/src/disk/minivhd/cwalk.h new file mode 100644 index 000000000..baa5d432d --- /dev/null +++ b/src/disk/minivhd/cwalk.h @@ -0,0 +1,457 @@ +#pragma once + +#ifndef CWK_LIBRARY_H +#define CWK_LIBRARY_H + +#include +#include + +/** + * A segment represents a single component of a path. For instance, on linux a + * path might look like this "/var/log/", which consists of two segments "var" + * and "log". + */ +struct cwk_segment +{ + const char *path; + const char *segments; + const char *begin; + const char *end; + size_t size; +}; + +/** + * The segment type can be used to identify whether a segment is a special + * segment or not. + * + * CWK_NORMAL - normal folder or file segment + * CWK_CURRENT - "./" current folder segment + * CWK_BACK - "../" relative back navigation segment + */ +enum cwk_segment_type +{ + CWK_NORMAL, + CWK_CURRENT, + CWK_BACK +}; + +/** + * @brief Determines the style which is used for the path parsing and + * generation. + */ +enum cwk_path_style +{ + CWK_STYLE_WINDOWS, + CWK_STYLE_UNIX +}; + +/** + * @brief Generates an absolute path based on a base. + * + * This function generates an absolute path based on a base path and another + * path. It is guaranteed to return an absolute path. If the second submitted + * path is absolute, it will override the base path. The result will be written + * to a buffer, which might be truncated if the buffer is not large enough to + * hold the full path. However, the truncated result will always be + * null-terminated. The returned value is the amount of characters which the + * resulting path would take if it was not truncated (excluding the + * null-terminating character). + * + * @param base The base path on which the relative path will be applied. + * @param path The relative path which will be applied on the base path. + * @param buffer The buffer where the result will be written to. + * @param buffer_size The size of the result buffer. + * @return Returns the total amount of characters of the new absolute path. + */ +size_t cwk_path_get_absolute(const char *base, const char *path, char *buffer, + size_t buffer_size); + +/** + * @brief Generates a relative path based on a base. + * + * This function generates a relative path based on a base path and another + * path. It determines how to get to the submitted path, starting from the base + * directory. The result will be written to a buffer, which might be truncated + * if the buffer is not large enough to hold the full path. However, the + * truncated result will always be null-terminated. The returned value is the + * amount of characters which the resulting path would take if it was not + * truncated (excluding the null-terminating character). + * + * @param base_directory The base path from which the relative path will start. + * @param path The target path where the relative path will point to. + * @param buffer The buffer where the result will be written to. + * @param buffer_size The size of the result buffer. + * @return Returns the total amount of characters of the full path. + */ +size_t cwk_path_get_relative(const char *base_directory, const char *path, + char *buffer, size_t buffer_size); + +/** + * @brief Joins two paths together. + * + * This function generates a new path by combining the two submitted paths. It + * will remove double separators, and unlike cwk_path_get_absolute it permits + * the use of two relative paths to combine. The result will be written to a + * buffer, which might be truncated if the buffer is not large enough to hold + * the full path. However, the truncated result will always be null-terminated. + * The returned value is the amount of characters which the resulting path would + * take if it was not truncated (excluding the null-terminating character). + * + * @param path_a The first path which comes first. + * @param path_b The second path which comes after the first. + * @param buffer The buffer where the result will be written to. + * @param buffer_size The size of the result buffer. + * @return Returns the total amount of characters of the full, combined path. + */ +size_t cwk_path_join(const char *path_a, const char *path_b, char *buffer, + size_t buffer_size); + +/** + * @brief Joins multiple paths together. + * + * This function generates a new path by joining multiple paths together. It + * will remove double separators, and unlike cwk_path_get_absolute it permits + * the use of multiple relative paths to combine. The last path of the submitted + * string array must be set to NULL. The result will be written to a buffer, + * which might be truncated if the buffer is not large enough to hold the full + * path. However, the truncated result will always be null-terminated. The + * returned value is the amount of characters which the resulting path would + * take if it was not truncated (excluding the null-terminating character). + * + * @param paths An array of paths which will be joined. + * @param buffer The buffer where the result will be written to. + * @param buffer_size The size of the result buffer. + * @return Returns the total amount of characters of the full, combined path. + */ +size_t cwk_path_join_multiple(const char **paths, char *buffer, + size_t buffer_size); + +/** + * @brief Determines the root of a path. + * + * This function determines the root of a path by finding it's length. The root + * always starts at the submitted path. If the path has no root, the length will + * be set to zero. + * + * @param path The path which will be inspected. + * @param length The output of the root length. + */ +void cwk_path_get_root(const char *path, size_t *length); + +/** + * @brief Changes the root of a path. + * + * This function changes the root of a path. It does not normalize the result. + * The result will be written to a buffer, which might be truncated if the + * buffer is not large enough to hold the full path. However, the truncated + * result will always be null-terminated. The returned value is the amount of + * characters which the resulting path would take if it was not truncated + * (excluding the null-terminating character). + * + * @param path The original path which will get a new root. + * @param new_root The new root which will be placed in the path. + * @param buffer The output buffer where the result is written to. + * @param buffer_size The size of the output buffer where the result is written + * to. + * @return Returns the total amount of characters of the new path. + */ +size_t cwk_path_change_root(const char *path, const char *new_root, + char *buffer, size_t buffer_size); + +/** + * @brief Determine whether the path is absolute or not. + * + * This function checks whether the path is an absolute path or not. A path is + * considered to be absolute if the root ends with a separator. + * + * @param path The path which will be checked. + * @return Returns true if the path is absolute or false otherwise. + */ +bool cwk_path_is_absolute(const char *path); + +/** + * @brief Determine whether the path is relative or not. + * + * This function checks whether the path is a relative path or not. A path is + * considered to be relative if the root does not end with a separator. + * + * @param path The path which will be checked. + * @return Returns true if the path is relative or false otherwise. + */ +bool cwk_path_is_relative(const char *path); + +/** + * @brief Gets the basename of a file path. + * + * This function gets the basename of a file path. A pointer to the beginning of + * the basename will be returned through the basename parameter. This pointer + * will be positioned on the first letter after the separator. The length of the + * file path will be returned through the length parameter. The length will be + * set to zero and the basename to NULL if there is no basename available. + * + * @param path The path which will be inspected. + * @param basename The output of the basename pointer. + * @param length The output of the length of the basename. + */ +void cwk_path_get_basename(const char *path, const char **basename, + size_t *length); + +/** + * @brief Changes the basename of a file path. + * + * This function changes the basename of a file path. This function will not + * write out more than the specified buffer can contain. However, the generated + * string is always null-terminated - even if not the whole path is written out. + * The function returns the total number of characters the complete buffer would + * have, even if it was not written out completely. The path may be the same + * memory address as the buffer. + * + * @param path The original path which will be used for the modified path. + * @param new_basename The new basename which will replace the old one. + * @param buffer The buffer where the changed path will be written to. + * @param buffer_size The size of the result buffer where the changed path is + * written to. + * @return Returns the size which the complete new path would have if it was not + * truncated. + */ +size_t cwk_path_change_basename(const char *path, const char *new_basename, + char *buffer, size_t buffer_size); + +/** + * @brief Gets the dirname of a file path. + * + * This function determines the dirname of a file path and returns the length up + * to which character is considered to be part of it. If no dirname is found, + * the length will be set to zero. The beginning of the dirname is always equal + * to the submitted path pointer. + * + * @param path The path which will be inspected. + * @param length The length of the dirname. + */ +void cwk_path_get_dirname(const char *path, size_t *length); + +/** + * @brief Gets the extension of a file path. + * + * This function extracts the extension portion of a file path. A pointer to + * the beginning of the extension will be returned through the extension + * parameter if an extension is found and true is returned. This pointer will be + * positioned on the dot. The length of the extension name will be returned + * through the length parameter. If no extension is found both parameters won't + * be touched and false will be returned. + * + * @param path The path which will be inspected. + * @param extension The output of the extension pointer. + * @param length The output of the length of the extension. + * @return Returns true if an extension is found or false otherwise. + */ +bool cwk_path_get_extension(const char *path, const char **extension, + size_t *length); + +/** + * @brief Determines whether the file path has an extension. + * + * This function determines whether the submitted file path has an extension. + * This will evaluate to true if the last segment of the path contains a dot. + * + * @param path The path which will be inspected. + * @return Returns true if the path has an extension or false otherwise. + */ +bool cwk_path_has_extension(const char *path); + +/** + * @brief Changes the extension of a file path. + * + * This function changes the extension of a file name. The function will append + * an extension if the basename does not have an extension, or use the extension + * as a basename if the path does not have a basename. This function will not + * write out more than the specified buffer can contain. However, the generated + * string is always null-terminated - even if not the whole path is written out. + * The function returns the total number of characters the complete buffer would + * have, even if it was not written out completely. The path may be the same + * memory address as the buffer. + * + * @param path The path which will be used to make the change. + * @param new_extension The extension which will be placed within the new path. + * @param buffer The output buffer where the result will be written to. + * @param buffer_size The size of the output buffer where the result will be + * written to. + * @return Returns the total size which the output would have if it was not + * truncated. + */ +size_t cwk_path_change_extension(const char *path, const char *new_extension, + char *buffer, size_t buffer_size); + +/** + * @brief Creates a normalized version of the path. + * + * This function creates a normalized version of the path within the specified + * buffer. This function will not write out more than the specified buffer can + * contain. However, the generated string is always null-terminated - even if + * not the whole path is written out. The function returns the total number of + * characters the complete buffer would have, even if it was not written out + * completely. The path may be the same memory address as the buffer. + * + * The following will be true for the normalized path: + * 1) "../" will be resolved. + * 2) "./" will be removed. + * 3) double separators will be fixed with a single separator. + * 4) separator suffixes will be removed. + * + * @param path The path which will be normalized. + * @param buffer The buffer where the new path is written to. + * @param buffer_size The size of the buffer. + * @return The size which the complete normalized path has if it was not + * truncated. + */ +size_t cwk_path_normalize(const char *path, char *buffer, size_t buffer_size); + +/** + * @brief Finds common portions in two paths. + * + * This function finds common portions in two paths and returns the number + * characters from the beginning of the base path which are equal to the other + * path. + * + * @param path_base The base path which will be compared with the other path. + * @param path_other The other path which will compared with the base path. + * @return Returns the number of characters which are common in the base path. + */ +size_t cwk_path_get_intersection(const char *path_base, const char *path_other); + +/** + * @brief Gets the first segment of a path. + * + * This function finds the first segment of a path. The position of the segment + * is set to the first character after the separator, and the length counts all + * characters until the next separator (excluding the separator). + * + * @param path The path which will be inspected. + * @param segment The segment which will be extracted. + * @return Returns true if there is a segment or false if there is none. + */ +bool cwk_path_get_first_segment(const char *path, struct cwk_segment *segment); + +/** + * @brief Gets the last segment of the path. + * + * This function gets the last segment of a path. This function may return false + * if the path doesn't contain any segments, in which case the submitted segment + * parameter is not modified. The position of the segment is set to the first + * character after the separator, and the length counts all characters until the + * end of the path (excluding the separator). + * + * @param path The path which will be inspected. + * @param segment The segment which will be extracted. + * @return Returns true if there is a segment or false if there is none. + */ +bool cwk_path_get_last_segment(const char *path, struct cwk_segment *segment); + +/** + * @brief Advances to the next segment. + * + * This function advances the current segment to the next segment. If there are + * no more segments left, the submitted segment structure will stay unchanged + * and false is returned. + * + * @param segment The current segment which will be advanced to the next one. + * @return Returns true if another segment was found or false otherwise. + */ +bool cwk_path_get_next_segment(struct cwk_segment *segment); + +/** + * @brief Moves to the previous segment. + * + * This function moves the current segment to the previous segment. If the + * current segment is the first one, the submitted segment structure will stay + * unchanged and false is returned. + * + * @param segment The current segment which will be moved to the previous one. + * @return Returns true if there is a segment before this one or false + * otherwise. + */ +bool cwk_path_get_previous_segment(struct cwk_segment *segment); + +/** + * @brief Gets the type of the submitted path segment. + * + * This function inspects the contents of the segment and determines the type of + * it. Currently, there are three types CWK_NORMAL, CWK_CURRENT and CWK_BACK. A + * CWK_NORMAL segment is a normal folder or file entry. A CWK_CURRENT is a "./" + * and a CWK_BACK a "../" segment. + * + * @param segment The segment which will be inspected. + * @return Returns the type of the segment. + */ +enum cwk_segment_type cwk_path_get_segment_type( + const struct cwk_segment *segment); + +/** + * @brief Changes the content of a segment. + * + * This function overrides the content of a segment to the submitted value and + * outputs the whole new path to the submitted buffer. The result might require + * less or more space than before if the new value length differs from the + * original length. The output is truncated if the new path is larger than the + * submitted buffer size, but it is always null-terminated. The source of the + * segment and the submitted buffer may be the same. + * + * @param segment The segment which will be modifier. + * @param value The new content of the segment. + * @param buffer The buffer where the modified path will be written to. + * @param buffer_size The size of the output buffer. + * @return Returns the total size which would have been written if the output + * was not truncated. + */ +size_t cwk_path_change_segment(struct cwk_segment *segment, const char *value, + char *buffer, size_t buffer_size); + +/** + * @brief Checks whether the submitted pointer points to a separator. + * + * This function simply checks whether the submitted pointer points to a + * separator, which has to be null-terminated (but not necessarily after the + * separator). The function will return true if it is a separator, or false + * otherwise. + * + * @param symbol A pointer to a string. + * @return Returns true if it is a separator, or false otherwise. + */ +bool cwk_path_is_separator(const char *str); + +/** + * @brief Guesses the path style. + * + * This function guesses the path style based on a submitted path-string. The + * guessing will look at the root and the type of slashes contained in the path + * and return the style which is more likely used in the path. + * + * @param path The path which will be inspected. + * @return Returns the style which is most likely used for the path. + */ +enum cwk_path_style cwk_path_guess_style(const char *path); + +/** + * @brief Configures which path style is used. + * + * This function configures which path style is used. The following styles are + * currently supported. + * + * CWK_STYLE_WINDOWS: Use backslashes as a separator and volume for the root. + * CWK_STYLE_UNIX: Use slashes as a separator and a slash for the root. + * + * @param style The style which will be used from now on. + */ +void cwk_path_set_style(enum cwk_path_style style); + +/** + * @brief Gets the path style configuration. + * + * This function gets the style configuration which is currently used for the + * paths. This configuration determines how paths are parsed and generated. + * + * @return Returns the current path style configuration. + */ +enum cwk_path_style cwk_path_get_style(void); + +#endif diff --git a/src/disk/minivhd/libxml2_encoding.c b/src/disk/minivhd/libxml2_encoding.c new file mode 100644 index 000000000..cb881a89b --- /dev/null +++ b/src/disk/minivhd/libxml2_encoding.c @@ -0,0 +1,447 @@ +/* + * encoding.c : implements the encoding conversion functions needed for XML + * + * Related specs: + * rfc2044 (UTF-8 and UTF-16) F. Yergeau Alis Technologies + * rfc2781 UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau + * [ISO-10646] UTF-8 and UTF-16 in Annexes + * [ISO-8859-1] ISO Latin-1 characters codes. + * [UNICODE] The Unicode Consortium, "The Unicode Standard -- + * Worldwide Character Encoding -- Version 1.0", Addison- + * Wesley, Volume 1, 1991, Volume 2, 1992. UTF-8 is + * described in Unicode Technical Report #4. + * [US-ASCII] Coded Character Set--7-bit American Standard Code for + * Information Interchange, ANSI X3.4-1986. + * + * See Copyright for the status of this software. + * + * daniel@veillard.com + * + * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" + * + * Adapted and abridged for MiniVHD by Sherman Perry + */ +#include + +static int xmlLittleEndian = 1; + +/* Note: extracted from original 'void xmlInitCharEncodingHandlers(void)' function */ +void xmlEncodingInit(void) +{ + unsigned short int tst = 0x1234; + unsigned char *ptr = (unsigned char *) &tst; + + if (*ptr == 0x12) xmlLittleEndian = 0; + else if (*ptr == 0x34) xmlLittleEndian = 1; +} + +/** + * UTF16LEToUTF8: + * @out: a pointer to an array of bytes to store the result + * @outlen: the length of @out + * @inb: a pointer to an array of UTF-16LE passwd as a byte array + * @inlenb: the length of @in in UTF-16LE chars + * + * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8 + * block of chars out. This function assumes the endian property + * is the same between the native type of this machine and the + * inputed one. + * + * Returns the number of bytes written, or -1 if lack of space, or -2 + * if the transcoding fails (if *in is not a valid utf16 string) + * The value of *inlen after return is the number of octets consumed + * if the return value is positive, else unpredictable. + */ +int UTF16LEToUTF8(unsigned char* out, int *outlen, + const unsigned char* inb, int *inlenb) +{ + unsigned char* outstart = out; + const unsigned char* processed = inb; + unsigned char* outend = out + *outlen; + unsigned short* in = (unsigned short*) inb; + unsigned short* inend; + unsigned int c, d, inlen; + unsigned char *tmp; + int bits; + + if ((*inlenb % 2) == 1) + (*inlenb)--; + inlen = *inlenb / 2; + inend = in + inlen; + while ((in < inend) && (out - outstart + 5 < *outlen)) { + if (xmlLittleEndian) { + c= *in++; + } else { + tmp = (unsigned char *) in; + c = *tmp++; + c = c | (((unsigned int)*tmp) << 8); + in++; + } + if ((c & 0xFC00) == 0xD800) { /* surrogates */ + if (in >= inend) { /* (in > inend) shouldn't happens */ + break; + } + if (xmlLittleEndian) { + d = *in++; + } else { + tmp = (unsigned char *) in; + d = *tmp++; + d = d | (((unsigned int)*tmp) << 8); + in++; + } + if ((d & 0xFC00) == 0xDC00) { + c &= 0x03FF; + c <<= 10; + c |= d & 0x03FF; + c += 0x10000; + } + else { + *outlen = out - outstart; + *inlenb = processed - inb; + return(-2); + } + } + + /* assertion: c is a single UTF-4 value */ + if (out >= outend) + break; + if (c < 0x80) { *out++= c; bits= -6; } + else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; } + else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; } + else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; } + + for ( ; bits >= 0; bits-= 6) { + if (out >= outend) + break; + *out++= ((c >> bits) & 0x3F) | 0x80; + } + processed = (const unsigned char*) in; + } + *outlen = out - outstart; + *inlenb = processed - inb; + return(*outlen); +} + +/** + * UTF8ToUTF16LE: + * @outb: a pointer to an array of bytes to store the result + * @outlen: the length of @outb + * @in: a pointer to an array of UTF-8 chars + * @inlen: the length of @in + * + * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE + * block of chars out. + * + * Returns the number of bytes written, or -1 if lack of space, or -2 + * if the transcoding failed. + */ +int UTF8ToUTF16LE(unsigned char* outb, int *outlen, + const unsigned char* in, int *inlen) +{ + unsigned short* out = (unsigned short*) outb; + const unsigned char* processed = in; + const unsigned char *const instart = in; + unsigned short* outstart= out; + unsigned short* outend; + const unsigned char* inend; + unsigned int c, d; + int trailing; + unsigned char *tmp; + unsigned short tmp1, tmp2; + + /* UTF16LE encoding has no BOM */ + if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1); + if (in == NULL) { + *outlen = 0; + *inlen = 0; + return(0); + } + inend= in + *inlen; + outend = out + (*outlen / 2); + while (in < inend) { + d= *in++; + if (d < 0x80) { c= d; trailing= 0; } + else if (d < 0xC0) { + /* trailing byte in leading position */ + *outlen = (out - outstart) * 2; + *inlen = processed - instart; + return(-2); + } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; } + else if (d < 0xF0) { c= d & 0x0F; trailing= 2; } + else if (d < 0xF8) { c= d & 0x07; trailing= 3; } + else { + /* no chance for this in UTF-16 */ + *outlen = (out - outstart) * 2; + *inlen = processed - instart; + return(-2); + } + + if (inend - in < trailing) { + break; + } + + for ( ; trailing; trailing--) { + if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) + break; + c <<= 6; + c |= d & 0x3F; + } + + /* assertion: c is a single UTF-4 value */ + if (c < 0x10000) { + if (out >= outend) + break; + if (xmlLittleEndian) { + *out++ = c; + } else { + tmp = (unsigned char *) out; + *tmp = c ; + *(tmp + 1) = c >> 8 ; + out++; + } + } + else if (c < 0x110000) { + if (out+1 >= outend) + break; + c -= 0x10000; + if (xmlLittleEndian) { + *out++ = 0xD800 | (c >> 10); + *out++ = 0xDC00 | (c & 0x03FF); + } else { + tmp1 = 0xD800 | (c >> 10); + tmp = (unsigned char *) out; + *tmp = (unsigned char) tmp1; + *(tmp + 1) = tmp1 >> 8; + out++; + + tmp2 = 0xDC00 | (c & 0x03FF); + tmp = (unsigned char *) out; + *tmp = (unsigned char) tmp2; + *(tmp + 1) = tmp2 >> 8; + out++; + } + } + else + break; + processed = in; + } + *outlen = (out - outstart) * 2; + *inlen = processed - instart; + return(*outlen); +} + +/** + * UTF16BEToUTF8: + * @out: a pointer to an array of bytes to store the result + * @outlen: the length of @out + * @inb: a pointer to an array of UTF-16 passed as a byte array + * @inlenb: the length of @in in UTF-16 chars + * + * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8 + * block of chars out. This function assumes the endian property + * is the same between the native type of this machine and the + * inputed one. + * + * Returns the number of bytes written, or -1 if lack of space, or -2 + * if the transcoding fails (if *in is not a valid utf16 string) + * The value of *inlen after return is the number of octets consumed + * if the return value is positive, else unpredictable. + */ +int UTF16BEToUTF8(unsigned char* out, int *outlen, + const unsigned char* inb, int *inlenb) +{ + unsigned char* outstart = out; + const unsigned char* processed = inb; + unsigned char* outend = out + *outlen; + unsigned short* in = (unsigned short*) inb; + unsigned short* inend; + unsigned int c, d, inlen; + unsigned char *tmp; + int bits; + + if ((*inlenb % 2) == 1) + (*inlenb)--; + inlen = *inlenb / 2; + inend= in + inlen; + while (in < inend) { + if (xmlLittleEndian) { + tmp = (unsigned char *) in; + c = *tmp++; + c = c << 8; + c = c | (unsigned int) *tmp; + in++; + } else { + c= *in++; + } + if ((c & 0xFC00) == 0xD800) { /* surrogates */ + if (in >= inend) { /* (in > inend) shouldn't happens */ + *outlen = out - outstart; + *inlenb = processed - inb; + return(-2); + } + if (xmlLittleEndian) { + tmp = (unsigned char *) in; + d = *tmp++; + d = d << 8; + d = d | (unsigned int) *tmp; + in++; + } else { + d= *in++; + } + if ((d & 0xFC00) == 0xDC00) { + c &= 0x03FF; + c <<= 10; + c |= d & 0x03FF; + c += 0x10000; + } + else { + *outlen = out - outstart; + *inlenb = processed - inb; + return(-2); + } + } + + /* assertion: c is a single UTF-4 value */ + if (out >= outend) + break; + if (c < 0x80) { *out++= c; bits= -6; } + else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; } + else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; } + else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; } + + for ( ; bits >= 0; bits-= 6) { + if (out >= outend) + break; + *out++= ((c >> bits) & 0x3F) | 0x80; + } + processed = (const unsigned char*) in; + } + *outlen = out - outstart; + *inlenb = processed - inb; + return(*outlen); +} + +/** + * UTF8ToUTF16BE: + * @outb: a pointer to an array of bytes to store the result + * @outlen: the length of @outb + * @in: a pointer to an array of UTF-8 chars + * @inlen: the length of @in + * + * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE + * block of chars out. + * + * Returns the number of byte written, or -1 by lack of space, or -2 + * if the transcoding failed. + */ +int UTF8ToUTF16BE(unsigned char* outb, int *outlen, + const unsigned char* in, int *inlen) +{ + unsigned short* out = (unsigned short*) outb; + const unsigned char* processed = in; + const unsigned char *const instart = in; + unsigned short* outstart= out; + unsigned short* outend; + const unsigned char* inend; + unsigned int c, d; + int trailing; + unsigned char *tmp; + unsigned short tmp1, tmp2; + + /* UTF-16BE has no BOM */ + if ((outb == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1); + if (in == NULL) { + *outlen = 0; + *inlen = 0; + return(0); + } + inend= in + *inlen; + outend = out + (*outlen / 2); + while (in < inend) { + d= *in++; + if (d < 0x80) { c= d; trailing= 0; } + else if (d < 0xC0) { + /* trailing byte in leading position */ + *outlen = out - outstart; + *inlen = processed - instart; + return(-2); + } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; } + else if (d < 0xF0) { c= d & 0x0F; trailing= 2; } + else if (d < 0xF8) { c= d & 0x07; trailing= 3; } + else { + /* no chance for this in UTF-16 */ + *outlen = out - outstart; + *inlen = processed - instart; + return(-2); + } + + if (inend - in < trailing) { + break; + } + + for ( ; trailing; trailing--) { + if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) break; + c <<= 6; + c |= d & 0x3F; + } + + /* assertion: c is a single UTF-4 value */ + if (c < 0x10000) { + if (out >= outend) break; + if (xmlLittleEndian) { + tmp = (unsigned char *) out; + *tmp = c >> 8; + *(tmp + 1) = c; + out++; + } else { + *out++ = c; + } + } + else if (c < 0x110000) { + if (out+1 >= outend) break; + c -= 0x10000; + if (xmlLittleEndian) { + tmp1 = 0xD800 | (c >> 10); + tmp = (unsigned char *) out; + *tmp = tmp1 >> 8; + *(tmp + 1) = (unsigned char) tmp1; + out++; + + tmp2 = 0xDC00 | (c & 0x03FF); + tmp = (unsigned char *) out; + *tmp = tmp2 >> 8; + *(tmp + 1) = (unsigned char) tmp2; + out++; + } else { + *out++ = 0xD800 | (c >> 10); + *out++ = 0xDC00 | (c & 0x03FF); + } + } + else + break; + processed = in; + } + *outlen = (out - outstart) * 2; + *inlen = processed - instart; + return(*outlen); +} + +/* This file is licenced under the MIT licence as follows: + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is fur- +nished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FIT- +NESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. */ \ No newline at end of file diff --git a/src/disk/minivhd/libxml2_encoding.h b/src/disk/minivhd/libxml2_encoding.h new file mode 100644 index 000000000..831aea4af --- /dev/null +++ b/src/disk/minivhd/libxml2_encoding.h @@ -0,0 +1,12 @@ +#ifndef LIBXML2_ENCODING_H +#define LIBXML2_ENCODING_H + +#include +typedef uint16_t mvhd_utf16; + +void xmlEncodingInit(void); +int UTF16LEToUTF8(unsigned char* out, int *outlen, const unsigned char* inb, int *inlenb); +int UTF8ToUTF16LE(unsigned char* outb, int *outlen, const unsigned char* in, int *inlen); +int UTF16BEToUTF8(unsigned char* out, int *outlen, const unsigned char* inb, int *inlenb); +int UTF8ToUTF16BE(unsigned char* outb, int *outlen, const unsigned char* in, int *inlen); +#endif \ No newline at end of file diff --git a/src/disk/minivhd/minivhd.h b/src/disk/minivhd/minivhd.h new file mode 100644 index 000000000..0d925f71a --- /dev/null +++ b/src/disk/minivhd/minivhd.h @@ -0,0 +1,269 @@ +#ifndef MINIVHD_H +#define MINIVHD_H + +#include +#include +#include + +extern int mvhd_errno; + +typedef enum MVHDError { + MVHD_ERR_MEM = -128, + MVHD_ERR_FILE, + MVHD_ERR_NOT_VHD, + MVHD_ERR_TYPE, + MVHD_ERR_FOOTER_CHECKSUM, + MVHD_ERR_SPARSE_CHECKSUM, + MVHD_ERR_UTF_TRANSCODING_FAILED, + MVHD_ERR_UTF_SIZE, + MVHD_ERR_PATH_REL, + MVHD_ERR_PATH_LEN, + MVHD_ERR_PAR_NOT_FOUND, + MVHD_ERR_INVALID_PAR_UUID, + MVHD_ERR_INVALID_GEOM, + MVHD_ERR_INVALID_SIZE, + MVHD_ERR_INVALID_BLOCK_SIZE, + MVHD_ERR_INVALID_PARAMS, + MVHD_ERR_CONV_SIZE, + MVHD_ERR_TIMESTAMP +} MVHDError; + +typedef enum MVHDType { + MVHD_TYPE_FIXED = 2, + MVHD_TYPE_DYNAMIC = 3, + MVHD_TYPE_DIFF = 4 +} MVHDType; + +typedef enum MVHDBlockSize { + MVHD_BLOCK_DEFAULT = 0, /**< 2 MB blocks */ + MVHD_BLOCK_SMALL = 1024, /**< 512 KB blocks */ + MVHD_BLOCK_LARGE = 4096 /**< 2 MB blocks */ +} MVHDBlockSize; + +typedef struct MVHDGeom { + uint16_t cyl; + uint8_t heads; + uint8_t spt; +} MVHDGeom; + +typedef void (*mvhd_progress_callback)(uint32_t current_sector, uint32_t total_sectors); + +typedef struct MVHDCreationOptions { + int type; /** MVHD_TYPE_FIXED, MVHD_TYPE_DYNAMIC, or MVHD_TYPE_DIFF */ + char* path; /** Absolute path of the new VHD file */ + char* parent_path; /** For MVHD_TYPE_DIFF, this is the absolute path of the VHD's parent. For non-diff VHDs, this should be NULL. */ + uint64_t size_in_bytes; /** Total size of the VHD's virtual disk in bytes. Must be a multiple of 512. If 0, the size is auto-calculated from the geometry field. Ignored for MVHD_TYPE_DIFF. */ + MVHDGeom geometry; /** The geometry of the VHD. If set to 0, the geometry is auto-calculated from the size_in_bytes field. */ + uint32_t block_size_in_sectors; /** MVHD_BLOCK_LARGE or MVHD_BLOCK_SMALL, or 0 for the default value. The number of sectors per block. */ + mvhd_progress_callback progress_callback; /** Optional; if not NULL, gets called to indicate progress on the creation operation. Only applies to MVHD_TYPE_FIXED. */ +} MVHDCreationOptions; + +typedef struct MVHDMeta MVHDMeta; + +/** + * \brief Output a string from a MiniVHD error number + * + * \param [in] err is the error number to return string from + * + * \return Error string + */ +const char* mvhd_strerr(MVHDError err); + +/** + * \brief A simple test to see if a given file is a VHD + * + * \param [in] f file to test + * + * \retval true if f is a VHD + * \retval false if f is not a VHD + */ +bool mvhd_file_is_vhd(FILE* f); + +/** + * \brief Open a VHD image for reading and/or writing + * + * The returned pointer contains all required values and structures (and files) to + * read and write to a VHD file. + * + * Remember to call mvhd_close() when you are finished. + * + * \param [in] Absolute path to VHD file. Relative path will cause issues when opening + * a differencing VHD file + * \param [in] readonly set this to true to open the VHD in a read only manner + * \param [out] err will be set if the VHD fails to open. Value could be one of + * MVHD_ERR_MEM, MVHD_ERR_FILE, MVHD_ERR_NOT_VHD, MVHD_ERR_FOOTER_CHECKSUM, MVHD_ERR_SPARSE_CHECKSUM, + * MVHD_ERR_TYPE, MVHD_ERR_TIMESTAMP + * If MVHD_ERR_FILE is set, mvhd_errno will be set to the appropriate system errno value + * + * \return MVHDMeta pointer. If NULL, check err. err may also be set to MVHD_ERR_TIMESTAMP if + * opening a differencing VHD. + */ +MVHDMeta* mvhd_open(const char* path, bool readonly, int* err); + +/** + * \brief Update the parent modified timestamp in the VHD file + * + * Differencing VHD's use a parent last modified timestamp to try and detect if the + * parent has been modified after the child has been created. However, this is rather + * fragile and can be broken by moving/copying the parent. Also, MS DiskPart does not + * set this timestamp in the child :( + * + * Be careful when using this function that you don't update the timestamp after the + * parent actually has been modified. + * + * \param [in] vhdm Differencing VHD to update. + * \param [out] err will be set if the timestamp could not be updated + * + * \return non-zero on error, 0 on success + */ +int mvhd_diff_update_par_timestamp(MVHDMeta* vhdm, int* err); + +/** + * \brief Create a fixed VHD image + * + * \param [in] path is the absolute path to the image to create + * \param [in] geom is the HDD geometry of the image to create. Determines final image size + * \param [out] err indicates what error occurred, if any + * \param [out] progress_callback optional; if not NULL, gets called to indicate progress on the creation operation + * + * \retval NULL if an error occurrs. Check value of *err for actual error. Otherwise returns pointer to a MVHDMeta struct + */ +MVHDMeta* mvhd_create_fixed(const char* path, MVHDGeom geom, int* err, mvhd_progress_callback progress_callback); + +/** + * \brief Create sparse (dynamic) VHD image. + * + * \param [in] path is the absolute path to the VHD file to create + * \param [in] geom is the HDD geometry of the image to create. Determines final image size + * \param [out] err indicates what error occurred, if any + * + * \return NULL if an error occurrs. Check value of *err for actual error. Otherwise returns pointer to a MVHDMeta struct + */ +MVHDMeta* mvhd_create_sparse(const char* path, MVHDGeom geom, int* err); + +/** + * \brief Create differencing VHD imagee. + * + * \param [in] path is the absolute path to the VHD file to create + * \param [in] par_path is the absolute path to a parent image. If NULL, a sparse image is created, otherwise create a differencing image + * \param [out] err indicates what error occurred, if any + * + * \return NULL if an error occurrs. Check value of *err for actual error. Otherwise returns pointer to a MVHDMeta struct + */ +MVHDMeta* mvhd_create_diff(const char* path, const char* par_path, int* err); + +/** + * \brief Create a VHD using the provided options + * + * Use mvhd_create_ex if you want more control over the VHD's options. For quick creation, you can use mvhd_create_fixed, mvhd_create_sparse, or mvhd_create_diff. + * + * \param [in] options the VHD creation options. + * \param [out] err indicates what error occurred, if any + * + * \retval NULL if an error occurrs. Check value of *err for actual error. Otherwise returns pointer to a MVHDMeta struct + */ +MVHDMeta* mvhd_create_ex(MVHDCreationOptions options, int* err); + +/** + * \brief Safely close a VHD image + * + * \param [in] vhdm MiniVHD data structure to close + */ +void mvhd_close(MVHDMeta* vhdm); + +/** + * \brief Calculate hard disk geometry from a provided size + * + * The VHD format uses Cylinder, Heads, Sectors per Track (CHS) when accessing the disk. + * The size of the disk can be determined from C * H * S * sector_size. + * + * Note, maximum geometry size (in bytes) is 65535 * 16 * 255 * 512, which is 127GB. + * However, the maximum VHD size is 2040GB. For VHDs larger than 127GB, the geometry size will be + * smaller than the actual VHD size. + * + * This function determines the appropriate CHS geometry from a provided size in bytes. + * The calculations used are those provided in "Appendix: CHS Calculation" from the document + * "Virtual Hard Disk Image Format Specification" provided by Microsoft. + * + * \param [in] size the desired VHD image size, in bytes + * + * \return MVHDGeom the calculated geometry. This can be used in the appropriate create functions. + */ +MVHDGeom mvhd_calculate_geometry(uint64_t size); + +/** + * \brief Convert a raw disk image to a fixed VHD image + * + * \param [in] utf8_raw_path is the path of the raw image to convert + * \param [in] utf8_vhd_path is the path of the VHD to create + * \param [out] err indicates what error occurred, if any + * + * \return NULL if an error occurrs. Check value of *err for actual error. Otherwise returns pointer to a MVHDMeta struct + */ +MVHDMeta* mvhd_convert_to_vhd_fixed(const char* utf8_raw_path, const char* utf8_vhd_path, int* err); + +/** + * \brief Convert a raw disk image to a sparse VHD image + * + * \param [in] utf8_raw_path is the path of the raw image to convert + * \param [in] utf8_vhd_path is the path of the VHD to create + * \param [out] err indicates what error occurred, if any + * + * \return NULL if an error occurrs. Check value of *err for actual error. Otherwise returns pointer to a MVHDMeta struct + */ +MVHDMeta* mvhd_convert_to_vhd_sparse(const char* utf8_raw_path, const char* utf8_vhd_path, int* err); + +/** + * \brief Convert a VHD image to a raw disk image + * + * \param [in] utf8_vhd_path is the path of the VHD to convert + * \param [in] utf8_raw_path is the path of the raw image to create + * \param [out] err indicates what error occurred, if any + * + * \return NULL if an error occurrs. Check value of *err for actual error. Otherwise returns the raw disk image FILE pointer + */ +FILE* mvhd_convert_to_raw(const char* utf8_vhd_path, const char* utf8_raw_path, int *err); + +/** + * \brief Read sectors from VHD file + * + * Read num_sectors, beginning at offset from the VHD file into a buffer + * + * \param [in] vhdm MiniVHD data structure + * \param [in] offset the sector offset from which to start reading from + * \param [in] num_sectors the number of sectors to read + * \param [out] out_buff the buffer to write sector data to + * + * \return the number of sectors that were not read, or zero + */ +int mvhd_read_sectors(MVHDMeta* vhdm, uint32_t offset, int num_sectors, void* out_buff); + +/** + * \brief Write sectors to VHD file + * + * Write num_sectors, beginning at offset from a buffer VHD file into the VHD file + * + * \param [in] vhdm MiniVHD data structure + * \param [in] offset the sector offset from which to start writing to + * \param [in] num_sectors the number of sectors to write + * \param [in] in_buffer the buffer to write sector data to + * + * \return the number of sectors that were not written, or zero + */ +int mvhd_write_sectors(MVHDMeta* vhdm, uint32_t offset, int num_sectors, void* in_buff); + +/** + * \brief Write zeroed sectors to VHD file + * + * Write num_sectors, beginning at offset, of zero data into the VHD file. + * We reuse the existing write functions, with a preallocated zero buffer as + * our source buffer. + * + * \param [in] vhdm MiniVHD data structure + * \param [in] offset the sector offset from which to start writing to + * \param [in] num_sectors the number of sectors to write + * + * \return the number of sectors that were not written, or zero + */ +int mvhd_format_sectors(MVHDMeta* vhdm, uint32_t offset, int num_sectors); +#endif \ No newline at end of file diff --git a/src/disk/minivhd/minivhd_convert.c b/src/disk/minivhd/minivhd_convert.c new file mode 100644 index 000000000..231e0f9b8 --- /dev/null +++ b/src/disk/minivhd/minivhd_convert.c @@ -0,0 +1,105 @@ +#include +#include +#include +#include +#include +#include "minivhd_create.h" +#include "minivhd_internal.h" +#include "minivhd_util.h" +#include "minivhd.h" + +static FILE* mvhd_open_existing_raw_img(const char* utf8_raw_path, MVHDGeom* geom, int* err); + +static FILE* mvhd_open_existing_raw_img(const char* utf8_raw_path, MVHDGeom* geom, int* err) { + FILE *raw_img = mvhd_fopen(utf8_raw_path, "rb", err); + if (raw_img == NULL) { + *err = MVHD_ERR_FILE; + return NULL; + } + if (geom == NULL) { + *err = MVHD_ERR_INVALID_GEOM; + return NULL; + } + mvhd_fseeko64(raw_img, 0, SEEK_END); + uint64_t size_bytes = (uint64_t)mvhd_ftello64(raw_img); + MVHDGeom new_geom = mvhd_calculate_geometry(size_bytes); + if (mvhd_calc_size_bytes(&new_geom) != size_bytes) { + *err = MVHD_ERR_CONV_SIZE; + return NULL; + } + geom->cyl = new_geom.cyl; + geom->heads = new_geom.heads; + geom->spt = new_geom.spt; + mvhd_fseeko64(raw_img, 0, SEEK_SET); + return raw_img; +} + +MVHDMeta* mvhd_convert_to_vhd_fixed(const char* utf8_raw_path, const char* utf8_vhd_path, int* err) { + MVHDGeom geom; + FILE *raw_img = mvhd_open_existing_raw_img(utf8_raw_path, &geom, err); + if (raw_img == NULL) { + return NULL; + } + uint64_t size_in_bytes = mvhd_calc_size_bytes(&geom); + MVHDMeta *vhdm = mvhd_create_fixed_raw(utf8_vhd_path, raw_img, size_in_bytes, &geom, err, NULL); + if (vhdm == NULL) { + return NULL; + } + return vhdm; +} +MVHDMeta* mvhd_convert_to_vhd_sparse(const char* utf8_raw_path, const char* utf8_vhd_path, int* err) { + MVHDGeom geom; + MVHDMeta *vhdm = NULL; + FILE *raw_img = mvhd_open_existing_raw_img(utf8_raw_path, &geom, err); + if (raw_img == NULL) { + return NULL; + } + vhdm = mvhd_create_sparse(utf8_vhd_path, geom, err); + if (vhdm == NULL) { + goto end; + } + uint8_t buff[4096] = {0}; // 8 sectors + uint8_t empty_buff[4096] = {0}; + int total_sectors = mvhd_calc_size_sectors(&geom); + int copy_sect = 0; + for (int i = 0; i < total_sectors; i += 8) { + copy_sect = 8; + if ((i + 8) >= total_sectors) { + copy_sect = total_sectors - i; + memset(buff, 0, sizeof buff); + } + fread(buff, MVHD_SECTOR_SIZE, copy_sect, raw_img); + /* Only write data if there's data to write, to take advantage of the sparse VHD format */ + if (memcmp(buff, empty_buff, sizeof buff) != 0) { + mvhd_write_sectors(vhdm, i, copy_sect, buff); + } + } +end: + fclose(raw_img); + return vhdm; +} +FILE* mvhd_convert_to_raw(const char* utf8_vhd_path, const char* utf8_raw_path, int *err) { + FILE *raw_img = mvhd_fopen(utf8_raw_path, "wb", err); + if (raw_img == NULL) { + return NULL; + } + MVHDMeta *vhdm = mvhd_open(utf8_vhd_path, true, err); + if (vhdm == NULL) { + fclose(raw_img); + return NULL; + } + uint8_t buff[4096] = {0}; // 8 sectors + int total_sectors = mvhd_calc_size_sectors((MVHDGeom*)&vhdm->footer.geom); + int copy_sect = 0; + for (int i = 0; i < total_sectors; i += 8) { + copy_sect = 8; + if ((i + 8) >= total_sectors) { + copy_sect = total_sectors - i; + } + mvhd_read_sectors(vhdm, i, copy_sect, buff); + fwrite(buff, MVHD_SECTOR_SIZE, copy_sect, raw_img); + } + mvhd_close(vhdm); + mvhd_fseeko64(raw_img, 0, SEEK_SET); + return raw_img; +} \ No newline at end of file diff --git a/src/disk/minivhd/minivhd_create.c b/src/disk/minivhd/minivhd_create.c new file mode 100644 index 000000000..9e34ece9d --- /dev/null +++ b/src/disk/minivhd/minivhd_create.c @@ -0,0 +1,482 @@ +#include +#include +#include +#include +#include +#include "cwalk.h" +#include "libxml2_encoding.h" +#include "minivhd_internal.h" +#include "minivhd_util.h" +#include "minivhd_struct_rw.h" +#include "minivhd_io.h" +#include "minivhd_create.h" +#include "minivhd.h" + +static void mvhd_gen_footer(MVHDFooter* footer, uint64_t size_in_bytes, MVHDGeom* geom, MVHDType type, uint64_t sparse_header_off); +static void mvhd_gen_sparse_header(MVHDSparseHeader* header, uint32_t num_blks, uint64_t bat_offset, uint32_t block_size_in_sectors); +static int mvhd_gen_par_loc(MVHDSparseHeader* header, + const char* child_path, + const char* par_path, + uint64_t start_offset, + mvhd_utf16* w2ku_path_buff, + mvhd_utf16* w2ru_path_buff, + MVHDError* err); +static MVHDMeta* mvhd_create_sparse_diff(const char* path, const char* par_path, uint64_t size_in_bytes, MVHDGeom* geom, uint32_t block_size_in_sectors, int* err); + +/** + * \brief Populate a VHD footer + * + * \param [in] footer to populate + * \param [in] size_in_bytes is the total size of the virtual hard disk in bytes + * \param [in] geom to use + * \param [in] type of HVD that is being created + * \param [in] sparse_header_off, an absolute file offset to the sparse header. Not used for fixed VHD images + */ +static void mvhd_gen_footer(MVHDFooter* footer, uint64_t size_in_bytes, MVHDGeom* geom, MVHDType type, uint64_t sparse_header_off) { + memcpy(footer->cookie, "conectix", sizeof footer->cookie); + footer->features = 0x00000002; + footer->fi_fmt_vers = 0x00010000; + footer->data_offset = (type == MVHD_TYPE_DIFF || type == MVHD_TYPE_DYNAMIC) ? sparse_header_off : 0xffffffffffffffff; + footer->timestamp = vhd_calc_timestamp(); + memcpy(footer->cr_app, "mvhd", sizeof footer->cr_app); + footer->cr_vers = 0x000e0000; + memcpy(footer->cr_host_os, "Wi2k", sizeof footer->cr_host_os); + footer->orig_sz = footer->curr_sz = size_in_bytes; + footer->geom.cyl = geom->cyl; + footer->geom.heads = geom->heads; + footer->geom.spt = geom->spt; + footer->disk_type = type; + mvhd_generate_uuid(footer->uuid); + footer->checksum = mvhd_gen_footer_checksum(footer); +} + +/** + * \brief Populate a VHD sparse header + * + * \param [in] header for sparse and differencing images + * \param [in] num_blks is the number of data blocks that the image contains + * \param [in] bat_offset is the absolute file offset for start of the Block Allocation Table + * \param [in] block_size_in_sectors is the block size in sectors. + */ +static void mvhd_gen_sparse_header(MVHDSparseHeader* header, uint32_t num_blks, uint64_t bat_offset, uint32_t block_size_in_sectors) { + memcpy(header->cookie, "cxsparse", sizeof header->cookie); + header->data_offset = 0xffffffffffffffff; + header->bat_offset = bat_offset; + header->head_vers = 0x00010000; + header->max_bat_ent = num_blks; + header->block_sz = block_size_in_sectors * (uint32_t)MVHD_SECTOR_SIZE; + header->checksum = mvhd_gen_sparse_checksum(header); +} + +/** + * \brief Generate parent locators for differencing VHD images + * + * \param [in] header the sparse header to populate with parent locator entries + * \param [in] child_path is the full path to the VHD being created + * \param [in] par_path is the full path to the parent image + * \param [in] start_offset is the absolute file offset from where to start storing the entries themselves. Must be sector aligned. + * \param [out] w2ku_path_buff is a buffer containing the full path to the parent, encoded as UTF16-LE + * \param [out] w2ru_path_buff is a buffer containing the relative path to the parent, encoded as UTF16-LE + * \param [out] err indicates what error occurred, if any + * + * \retval 0 if success + * \retval < 0 if an error occurrs. Check value of *err for actual error + */ +static int mvhd_gen_par_loc(MVHDSparseHeader* header, + const char* child_path, + const char* par_path, + uint64_t start_offset, + mvhd_utf16* w2ku_path_buff, + mvhd_utf16* w2ru_path_buff, + MVHDError* err) { + /* Get our paths to store in the differencing VHD. We want both the absolute path to the parent, + as well as the relative path from the child VHD */ + int rv = 0; + char* par_filename; + size_t par_fn_len; + char rel_path[MVHD_MAX_PATH_BYTES] = {0}; + char child_dir[MVHD_MAX_PATH_BYTES] = {0}; + size_t child_dir_len; + if (strlen(child_path) < sizeof child_dir) { + strcpy(child_dir, child_path); + } else { + *err = MVHD_ERR_PATH_LEN; + rv = -1; + goto end; + } + cwk_path_get_basename(par_path, (const char**)&par_filename, &par_fn_len); + cwk_path_get_dirname(child_dir, &child_dir_len); + child_dir[child_dir_len] = '\0'; + size_t rel_len = cwk_path_get_relative(child_dir, par_path, rel_path, sizeof rel_path); + if (rel_len > sizeof rel_path) { + *err = MVHD_ERR_PATH_LEN; + rv = -1; + goto end; + } + /* We have our paths, now store the parent filename directly in the sparse header. */ + int outlen = sizeof header->par_utf16_name; + int utf_ret; + utf_ret = UTF8ToUTF16BE((unsigned char*)header->par_utf16_name, &outlen, (const unsigned char*)par_filename, (int*)&par_fn_len); + if (utf_ret < 0) { + mvhd_set_encoding_err(utf_ret, (int*)err); + rv = -1; + goto end; + } + + /* And encode the paths to UTF16-LE */ + size_t par_path_len = strlen(par_path); + outlen = sizeof *w2ku_path_buff * MVHD_MAX_PATH_CHARS; + utf_ret = UTF8ToUTF16LE((unsigned char*)w2ku_path_buff, &outlen, (const unsigned char*)par_path, (int*)&par_path_len); + if (utf_ret < 0) { + mvhd_set_encoding_err(utf_ret, (int*)err); + rv = -1; + goto end; + } + int w2ku_len = utf_ret; + outlen = sizeof *w2ru_path_buff * MVHD_MAX_PATH_CHARS; + utf_ret = UTF8ToUTF16LE((unsigned char*)w2ru_path_buff, &outlen, (const unsigned char*)rel_path, (int*)&rel_len); + if (utf_ret < 0) { + mvhd_set_encoding_err(utf_ret, (int*)err); + rv = -1; + goto end; + } + int w2ru_len = utf_ret; + /** + * Finally populate the parent locaters in the sparse header. + * This is the information needed to find the paths saved elsewhere + * in the VHD image + */ + + /* Note about the plat_data_space field: The VHD spec says this field stores the number of sectors needed to store the locator path. + * However, Hyper-V and VPC store the number of bytes, not the number of sectors, and will refuse to open VHDs which have the + * number of sectors in this field. + * See https://stackoverflow.com/questions/40760181/mistake-in-virtual-hard-disk-image-format-specification + */ + header->par_loc_entry[0].plat_code = MVHD_DIF_LOC_W2KU; + header->par_loc_entry[0].plat_data_len = (uint32_t)w2ku_len; + header->par_loc_entry[0].plat_data_offset = (uint64_t)start_offset; + header->par_loc_entry[0].plat_data_space = ((header->par_loc_entry[0].plat_data_len / MVHD_SECTOR_SIZE) + 1) * MVHD_SECTOR_SIZE; + header->par_loc_entry[1].plat_code = MVHD_DIF_LOC_W2RU; + header->par_loc_entry[1].plat_data_len = (uint32_t)w2ru_len; + header->par_loc_entry[1].plat_data_offset = (uint64_t)start_offset + ((uint64_t)header->par_loc_entry[0].plat_data_space); + header->par_loc_entry[1].plat_data_space = ((header->par_loc_entry[1].plat_data_len / MVHD_SECTOR_SIZE) + 1) * MVHD_SECTOR_SIZE; + goto end; + +end: + return rv; +} + +MVHDMeta* mvhd_create_fixed(const char* path, MVHDGeom geom, int* err, mvhd_progress_callback progress_callback) { + uint64_t size_in_bytes = mvhd_calc_size_bytes(&geom); + return mvhd_create_fixed_raw(path, NULL, size_in_bytes, &geom, err, progress_callback); +} + +/** + * \brief internal function that implements public mvhd_create_fixed() functionality + * + * Contains one more parameter than the public function, to allow using an existing + * raw disk image as the data source for the new fixed VHD. + * + * \param [in] raw_image file handle to a raw disk image to populate VHD + */ +MVHDMeta* mvhd_create_fixed_raw(const char* path, FILE* raw_img, uint64_t size_in_bytes, MVHDGeom* geom, int* err, mvhd_progress_callback progress_callback) { + uint8_t img_data[MVHD_SECTOR_SIZE] = {0}; + uint8_t footer_buff[MVHD_FOOTER_SIZE] = {0}; + MVHDMeta* vhdm = calloc(1, sizeof *vhdm); + if (vhdm == NULL) { + *err = MVHD_ERR_MEM; + goto end; + } + if (geom == NULL || (geom->cyl == 0 || geom->heads == 0 || geom->spt == 0)) { + *err = MVHD_ERR_INVALID_GEOM; + goto cleanup_vhdm; + } + FILE* f = mvhd_fopen(path, "wb+", err); + if (f == NULL) { + goto cleanup_vhdm; + } + mvhd_fseeko64(f, 0, SEEK_SET); + uint32_t size_sectors = (uint32_t)(size_in_bytes / MVHD_SECTOR_SIZE); + uint32_t s; + if (progress_callback) + progress_callback(0, size_sectors); + if (raw_img != NULL) { + mvhd_fseeko64(raw_img, 0, SEEK_END); + uint64_t raw_size = (uint64_t)mvhd_ftello64(raw_img); + MVHDGeom raw_geom = mvhd_calculate_geometry(raw_size); + if (mvhd_calc_size_bytes(&raw_geom) != raw_size) { + *err = MVHD_ERR_CONV_SIZE; + goto cleanup_vhdm; + } + mvhd_gen_footer(&vhdm->footer, raw_size, geom, MVHD_TYPE_FIXED, 0); + mvhd_fseeko64(raw_img, 0, SEEK_SET); + for (s = 0; s < size_sectors; s++) { + fread(img_data, sizeof img_data, 1, raw_img); + fwrite(img_data, sizeof img_data, 1, f); + if (progress_callback) + progress_callback(s + 1, size_sectors); + } + } else { + mvhd_gen_footer(&vhdm->footer, size_in_bytes, geom, MVHD_TYPE_FIXED, 0); + for (s = 0; s < size_sectors; s++) { + fwrite(img_data, sizeof img_data, 1, f); + if (progress_callback) + progress_callback(s + 1, size_sectors); + } + } + mvhd_footer_to_buffer(&vhdm->footer, footer_buff); + fwrite(footer_buff, sizeof footer_buff, 1, f); + fclose(f); + f = NULL; + free(vhdm); + vhdm = mvhd_open(path, false, err); + goto end; + +cleanup_vhdm: + free(vhdm); + vhdm = NULL; +end: + return vhdm; +} + +/** + * \brief Create sparse or differencing VHD image. + * + * \param [in] path is the absolute path to the VHD file to create + * \param [in] par_path is the absolute path to a parent image. If NULL, a sparse image is created, otherwise create a differencing image + * \param [in] size_in_bytes is the total size in bytes of the virtual hard disk image + * \param [in] geom is the HDD geometry of the image to create. Determines final image size + * \param [in] block_size_in_sectors is the block size in sectors + * \param [out] err indicates what error occurred, if any + * + * \return NULL if an error occurrs. Check value of *err for actual error. Otherwise returns pointer to a MVHDMeta struct + */ +static MVHDMeta* mvhd_create_sparse_diff(const char* path, const char* par_path, uint64_t size_in_bytes, MVHDGeom* geom, uint32_t block_size_in_sectors, int* err) { + uint8_t footer_buff[MVHD_FOOTER_SIZE] = {0}; + uint8_t sparse_buff[MVHD_SPARSE_SIZE] = {0}; + uint8_t bat_sect[MVHD_SECTOR_SIZE]; + MVHDGeom par_geom = {0}; + memset(bat_sect, 0xffffffff, sizeof bat_sect); + MVHDMeta* vhdm = NULL; + MVHDMeta* par_vhdm = NULL; + mvhd_utf16* w2ku_path_buff = NULL; + mvhd_utf16* w2ru_path_buff = NULL; + uint32_t par_mod_timestamp = 0; + if (par_path != NULL) { + par_mod_timestamp = mvhd_file_mod_timestamp(par_path, err); + if (*err != 0) { + goto end; + } + par_vhdm = mvhd_open(par_path, true, err); + if (par_vhdm == NULL) { + goto end; + } + } + vhdm = calloc(1, sizeof *vhdm); + if (vhdm == NULL) { + *err = MVHD_ERR_MEM; + goto cleanup_par_vhdm; + } + if (par_vhdm != NULL) { + /* We use the geometry from the parent VHD, not what was passed in */ + par_geom.cyl = par_vhdm->footer.geom.cyl; + par_geom.heads = par_vhdm->footer.geom.heads; + par_geom.spt = par_vhdm->footer.geom.spt; + geom = &par_geom; + size_in_bytes = par_vhdm->footer.curr_sz; + } else if (geom == NULL || (geom->cyl == 0 || geom->heads == 0 || geom->spt == 0)) { + *err = MVHD_ERR_INVALID_GEOM; + goto cleanup_vhdm; + } + + FILE* f = mvhd_fopen(path, "wb+", err); + if (f == NULL) { + goto cleanup_vhdm; + } + mvhd_fseeko64(f, 0, SEEK_SET); + /* Note, the sparse header follows the footer copy at the beginning of the file */ + if (par_path == NULL) { + mvhd_gen_footer(&vhdm->footer, size_in_bytes, geom, MVHD_TYPE_DYNAMIC, MVHD_FOOTER_SIZE); + } else { + mvhd_gen_footer(&vhdm->footer, size_in_bytes, geom, MVHD_TYPE_DIFF, MVHD_FOOTER_SIZE); + } + mvhd_footer_to_buffer(&vhdm->footer, footer_buff); + /* As mentioned, start with a copy of the footer */ + fwrite(footer_buff, sizeof footer_buff, 1, f); + /** + * Calculate the number of (2MB or 512KB) data blocks required to store the entire + * contents of the disk image, followed by the number of sectors the + * BAT occupies in the image. Note, the BAT is sector aligned, and is padded + * to the next sector boundary + * */ + uint32_t size_in_sectors = (uint32_t)(size_in_bytes / MVHD_SECTOR_SIZE); + uint32_t num_blks = size_in_sectors / block_size_in_sectors; + if (size_in_sectors % block_size_in_sectors != 0) { + num_blks += 1; + } + uint32_t num_bat_sect = num_blks / MVHD_BAT_ENT_PER_SECT; + if (num_blks % MVHD_BAT_ENT_PER_SECT != 0) { + num_bat_sect += 1; + } + /* Storing the BAT directly following the footer and header */ + uint64_t bat_offset = MVHD_FOOTER_SIZE + MVHD_SPARSE_SIZE; + uint64_t par_loc_offset = 0; + + /** + * If creating a differencing VHD, populate the sparse header with additional + * data about the parent image, and where to find it, and it's last modified timestamp + * */ + if (par_vhdm != NULL) { + /** + * Create output buffers to encode paths into. + * The paths are not stored directly in the sparse header, hence the need to + * store them in buffers to be written to the VHD image later + */ + w2ku_path_buff = calloc(MVHD_MAX_PATH_CHARS, sizeof * w2ku_path_buff); + if (w2ku_path_buff == NULL) { + *err = MVHD_ERR_MEM; + goto end; + } + w2ru_path_buff = calloc(MVHD_MAX_PATH_CHARS, sizeof * w2ru_path_buff); + if (w2ru_path_buff == NULL) { + *err = MVHD_ERR_MEM; + goto end; + } + memcpy(vhdm->sparse.par_uuid, par_vhdm->footer.uuid, sizeof vhdm->sparse.par_uuid); + par_loc_offset = bat_offset + ((uint64_t)num_bat_sect * MVHD_SECTOR_SIZE) + (5 * MVHD_SECTOR_SIZE); + if (mvhd_gen_par_loc(&vhdm->sparse, path, par_path, par_loc_offset, w2ku_path_buff, w2ru_path_buff, (MVHDError*)err) < 0) { + goto cleanup_vhdm; + } + vhdm->sparse.par_timestamp = par_mod_timestamp; + } + mvhd_gen_sparse_header(&vhdm->sparse, num_blks, bat_offset, block_size_in_sectors); + mvhd_header_to_buffer(&vhdm->sparse, sparse_buff); + fwrite(sparse_buff, sizeof sparse_buff, 1, f); + /* The BAT sectors need to be filled with 0xffffffff */ + for (uint32_t i = 0; i < num_bat_sect; i++) { + fwrite(bat_sect, sizeof bat_sect, 1, f); + } + mvhd_write_empty_sectors(f, 5); + /** + * If creating a differencing VHD, the paths to the parent image need to be written + * tp the file. Both absolute and relative paths are written + * */ + if (par_vhdm != NULL) { + uint64_t curr_pos = (uint64_t)mvhd_ftello64(f); + /* Double check my sums... */ + assert(curr_pos == par_loc_offset); + /* Fill the space required for location data with zero */ + uint8_t empty_sect[MVHD_SECTOR_SIZE] = {0}; + for (int i = 0; i < 2; i++) { + for (uint32_t j = 0; j < (vhdm->sparse.par_loc_entry[i].plat_data_space / MVHD_SECTOR_SIZE); j++) { + fwrite(empty_sect, sizeof empty_sect, 1, f); + } + } + /* Now write the location entries */ + mvhd_fseeko64(f, vhdm->sparse.par_loc_entry[0].plat_data_offset, SEEK_SET); + fwrite(w2ku_path_buff, vhdm->sparse.par_loc_entry[0].plat_data_len, 1, f); + mvhd_fseeko64(f, vhdm->sparse.par_loc_entry[1].plat_data_offset, SEEK_SET); + fwrite(w2ru_path_buff, vhdm->sparse.par_loc_entry[1].plat_data_len, 1, f); + /* and reset the file position to continue */ + mvhd_fseeko64(f, vhdm->sparse.par_loc_entry[1].plat_data_offset + vhdm->sparse.par_loc_entry[1].plat_data_space, SEEK_SET); + mvhd_write_empty_sectors(f, 5); + } + /* And finish with the footer */ + fwrite(footer_buff, sizeof footer_buff, 1, f); + fclose(f); + f = NULL; + free(vhdm); + vhdm = mvhd_open(path, false, err); + goto end; + +cleanup_vhdm: + free(vhdm); + vhdm = NULL; +cleanup_par_vhdm: + if (par_vhdm != NULL) { + mvhd_close(par_vhdm); + } +end: + free(w2ku_path_buff); + free(w2ru_path_buff); + return vhdm; +} + +MVHDMeta* mvhd_create_sparse(const char* path, MVHDGeom geom, int* err) { + uint64_t size_in_bytes = mvhd_calc_size_bytes(&geom); + return mvhd_create_sparse_diff(path, NULL, size_in_bytes, &geom, MVHD_BLOCK_LARGE, err); +} + +MVHDMeta* mvhd_create_diff(const char* path, const char* par_path, int* err) { + return mvhd_create_sparse_diff(path, par_path, 0, NULL, MVHD_BLOCK_LARGE, err); +} + +MVHDMeta* mvhd_create_ex(MVHDCreationOptions options, int* err) { + uint32_t geom_sector_size; + switch (options.type) + { + case MVHD_TYPE_FIXED: + case MVHD_TYPE_DYNAMIC: + geom_sector_size = mvhd_calc_size_sectors(&(options.geometry)); + if ((options.size_in_bytes > 0 && (options.size_in_bytes % MVHD_SECTOR_SIZE) > 0) + || (options.size_in_bytes > MVHD_MAX_SIZE_IN_BYTES) + || (options.size_in_bytes == 0 && geom_sector_size == 0)) + { + *err = MVHD_ERR_INVALID_SIZE; + return NULL; + } + + if (options.size_in_bytes > 0 && ((uint64_t)geom_sector_size * MVHD_SECTOR_SIZE) > options.size_in_bytes) + { + *err = MVHD_ERR_INVALID_GEOM; + return NULL; + } + + if (options.size_in_bytes == 0) + options.size_in_bytes = (uint64_t)geom_sector_size * MVHD_SECTOR_SIZE; + + if (geom_sector_size == 0) + options.geometry = mvhd_calculate_geometry(options.size_in_bytes); + break; + case MVHD_TYPE_DIFF: + if (options.parent_path == NULL) + { + *err = MVHD_ERR_FILE; + return NULL; + } + break; + default: + *err = MVHD_ERR_TYPE; + return NULL; + } + + if (options.path == NULL) + { + *err = MVHD_ERR_FILE; + return NULL; + } + + if (options.type != MVHD_TYPE_FIXED) + { + if (options.block_size_in_sectors == MVHD_BLOCK_DEFAULT) + options.block_size_in_sectors = MVHD_BLOCK_LARGE; + + if (options.block_size_in_sectors != MVHD_BLOCK_LARGE && options.block_size_in_sectors != MVHD_BLOCK_SMALL) + { + *err = MVHD_ERR_INVALID_BLOCK_SIZE; + return NULL; + } + } + + switch (options.type) + { + case MVHD_TYPE_FIXED: + return mvhd_create_fixed_raw(options.path, NULL, options.size_in_bytes, &(options.geometry), err, options.progress_callback); + case MVHD_TYPE_DYNAMIC: + return mvhd_create_sparse_diff(options.path, NULL, options.size_in_bytes, &(options.geometry), options.block_size_in_sectors, err); + case MVHD_TYPE_DIFF: + return mvhd_create_sparse_diff(options.path, options.parent_path, 0, NULL, options.block_size_in_sectors, err); + } + + return NULL; /* Make the compiler happy */ +} \ No newline at end of file diff --git a/src/disk/minivhd/minivhd_create.h b/src/disk/minivhd/minivhd_create.h new file mode 100644 index 000000000..9840d19ff --- /dev/null +++ b/src/disk/minivhd/minivhd_create.h @@ -0,0 +1,8 @@ +#ifndef MINIVHD_CREATE_H +#define MINIVHD_CREATE_H +#include +#include "minivhd.h" + +MVHDMeta* mvhd_create_fixed_raw(const char* path, FILE* raw_img, uint64_t size_in_bytes, MVHDGeom* geom, int* err, mvhd_progress_callback progress_callback); + +#endif \ No newline at end of file diff --git a/src/disk/minivhd/minivhd_internal.h b/src/disk/minivhd/minivhd_internal.h new file mode 100644 index 000000000..ea75284f8 --- /dev/null +++ b/src/disk/minivhd/minivhd_internal.h @@ -0,0 +1,96 @@ +#ifndef MINIVHD_INTERNAL_H +#define MINIVHD_INTERNAL_H +#include +#include +#include + +#define MVHD_FOOTER_SIZE 512 +#define MVHD_SPARSE_SIZE 1024 + +#define MVHD_SECTOR_SIZE 512 +#define MVHD_BAT_ENT_PER_SECT 128 + +#define MVHD_MAX_SIZE_IN_BYTES 0x1fe00000000 + +#define MVHD_SPARSE_BLK 0xffffffff +/* For simplicity, we don't handle paths longer than this + * Note, this is the max path in characters, as that is what + * Windows uses + */ +#define MVHD_MAX_PATH_CHARS 260 +#define MVHD_MAX_PATH_BYTES 1040 + +#define MVHD_DIF_LOC_W2RU 0x57327275 +#define MVHD_DIF_LOC_W2KU 0x57326B75 + +typedef struct MVHDSectorBitmap { + uint8_t* curr_bitmap; + int sector_count; + int curr_block; +} MVHDSectorBitmap; + +typedef struct MVHDFooter { + uint8_t cookie[8]; + uint32_t features; + uint32_t fi_fmt_vers; + uint64_t data_offset; + uint32_t timestamp; + uint8_t cr_app[4]; + uint32_t cr_vers; + uint8_t cr_host_os[4]; + uint64_t orig_sz; + uint64_t curr_sz; + struct { + uint16_t cyl; + uint8_t heads; + uint8_t spt; + } geom; + uint32_t disk_type; + uint32_t checksum; + uint8_t uuid[16]; + uint8_t saved_st; + uint8_t reserved[427]; +} MVHDFooter; + +typedef struct MVHDSparseHeader { + uint8_t cookie[8]; + uint64_t data_offset; + uint64_t bat_offset; + uint32_t head_vers; + uint32_t max_bat_ent; + uint32_t block_sz; + uint32_t checksum; + uint8_t par_uuid[16]; + uint32_t par_timestamp; + uint32_t reserved_1; + uint8_t par_utf16_name[512]; + struct { + uint32_t plat_code; + uint32_t plat_data_space; + uint32_t plat_data_len; + uint32_t reserved; + uint64_t plat_data_offset; + } par_loc_entry[8]; + uint8_t reserved_2[256]; +} MVHDSparseHeader; + +typedef struct MVHDMeta MVHDMeta; +struct MVHDMeta { + FILE* f; + bool readonly; + char filename[MVHD_MAX_PATH_BYTES]; + struct MVHDMeta* parent; + MVHDFooter footer; + MVHDSparseHeader sparse; + uint32_t* block_offset; + int sect_per_block; + MVHDSectorBitmap bitmap; + int (*read_sectors)(MVHDMeta*, uint32_t, int, void*); + int (*write_sectors)(MVHDMeta*, uint32_t, int, void*); + struct { + uint8_t* zero_data; + int sector_count; + } format_buffer; +}; + +#endif \ No newline at end of file diff --git a/src/disk/minivhd/minivhd_io.c b/src/disk/minivhd/minivhd_io.c new file mode 100644 index 000000000..8e9172e63 --- /dev/null +++ b/src/disk/minivhd/minivhd_io.c @@ -0,0 +1,276 @@ +/** + * \file + * \brief Sector reading and writing implementations + */ + +#include +#include +#include "minivhd_internal.h" +#include "minivhd_util.h" + +/* The following bit array macros adapted from + http://www.mathcs.emory.edu/~cheung/Courses/255/Syllabus/1-C-intro/bit-array.html */ + +#define VHD_SETBIT(A,k) ( A[(k/8)] |= (0x80 >> (k%8)) ) +#define VHD_CLEARBIT(A,k) ( A[(k/8)] &= ~(0x80 >> (k%8)) ) +#define VHD_TESTBIT(A,k) ( A[(k/8)] & (0x80 >> (k%8)) ) + +static inline void mvhd_check_sectors(uint32_t offset, int num_sectors, uint32_t total_sectors, int* transfer_sect, int* trunc_sect); +static void mvhd_read_sect_bitmap(MVHDMeta* vhdm, int blk); +static void mvhd_write_bat_entry(MVHDMeta* vhdm, int blk); +static void mvhd_create_block(MVHDMeta* vhdm, int blk); +static void mvhd_write_curr_sect_bitmap(MVHDMeta* vhdm); + +/** + * \brief Check that we will not be overflowing buffers + * + * \param [in] offset The offset from which we are beginning from + * \param [in] num_sectors The number of sectors which we desire to read/write + * \param [in] total_sectors The total number of sectors available + * \param [out] transfer_sect The number of sectors to actually write. + * This may be lower than num_sectors if offset + num_sectors >= total_sectors + * \param [out] trunc_sectors The number of sectors truncated if transfer_sectors < num_sectors + */ +static inline void mvhd_check_sectors(uint32_t offset, int num_sectors, uint32_t total_sectors, int* transfer_sect, int* trunc_sect) { + *transfer_sect = num_sectors; + *trunc_sect = 0; + if ((total_sectors - offset) < (uint32_t)*transfer_sect) { + *transfer_sect = total_sectors - offset; + *trunc_sect = num_sectors - *transfer_sect; + } +} + +void mvhd_write_empty_sectors(FILE* f, int sector_count) { + uint8_t zero_bytes[MVHD_SECTOR_SIZE] = {0}; + for (int i = 0; i < sector_count; i++) { + fwrite(zero_bytes, sizeof zero_bytes, 1, f); + } +} + +/** + * \brief Read the sector bitmap for a block. + * + * If the block is sparse, the sector bitmap in memory will be + * zeroed. Otherwise, the sector bitmap is read from the VHD file. + * + * \param [in] vhdm MiniVHD data structure + * \param [in] blk The block for which to read the sector bitmap from + */ +static void mvhd_read_sect_bitmap(MVHDMeta* vhdm, int blk) { + if (vhdm->block_offset[blk] != MVHD_SPARSE_BLK) { + mvhd_fseeko64(vhdm->f, (uint64_t)vhdm->block_offset[blk] * MVHD_SECTOR_SIZE, SEEK_SET); + fread(vhdm->bitmap.curr_bitmap, vhdm->bitmap.sector_count * MVHD_SECTOR_SIZE, 1, vhdm->f); + } else { + memset(vhdm->bitmap.curr_bitmap, 0, vhdm->bitmap.sector_count * MVHD_SECTOR_SIZE); + } + vhdm->bitmap.curr_block = blk; +} + +/** + * \brief Write the current sector bitmap in memory to file + * + * \param [in] vhdm MiniVHD data structure + */ +static void mvhd_write_curr_sect_bitmap(MVHDMeta* vhdm) { + if (vhdm->bitmap.curr_block >= 0) { + int64_t abs_offset = (int64_t)vhdm->block_offset[vhdm->bitmap.curr_block] * MVHD_SECTOR_SIZE; + mvhd_fseeko64(vhdm->f, abs_offset, SEEK_SET); + fwrite(vhdm->bitmap.curr_bitmap, MVHD_SECTOR_SIZE, vhdm->bitmap.sector_count, vhdm->f); + } +} + +/** + * \brief Write block offset from memory into file + * + * \param [in] vhdm MiniVHD data structure + * \param [in] blk The block for which to write the offset for + */ +static void mvhd_write_bat_entry(MVHDMeta* vhdm, int blk) { + uint64_t table_offset = vhdm->sparse.bat_offset + ((uint64_t)blk * sizeof *vhdm->block_offset); + uint32_t offset = mvhd_to_be32(vhdm->block_offset[blk]); + mvhd_fseeko64(vhdm->f, table_offset, SEEK_SET); + fwrite(&offset, sizeof offset, 1, vhdm->f); +} + +/** + * \brief Create an empty block in a sparse or differencing VHD image + * + * VHD images store data in blocks, which are typically 4096 sectors in size + * (~2MB). These blocks may be stored on disk in any order. Blocks are created + * on demand when required. + * + * This function creates new, empty blocks, by replacing the footer at the end of the file + * and then re-inserting the footer at the new file end. The BAT table entry for the + * new block is updated with the new offset. + * + * \param [in] vhdm MiniVHD data structure + * \param [in] blk The block number to create + */ +static void mvhd_create_block(MVHDMeta* vhdm, int blk) { + uint8_t footer[MVHD_FOOTER_SIZE]; + /* Seek to where the footer SHOULD be */ + mvhd_fseeko64(vhdm->f, -MVHD_FOOTER_SIZE, SEEK_END); + fread(footer, sizeof footer, 1, vhdm->f); + mvhd_fseeko64(vhdm->f, -MVHD_FOOTER_SIZE, SEEK_END); + if (!mvhd_is_conectix_str(footer)) { + /* Oh dear. We use the header instead, since something has gone wrong at the footer */ + mvhd_fseeko64(vhdm->f, 0, SEEK_SET); + fread(footer, sizeof footer, 1, vhdm->f); + mvhd_fseeko64(vhdm->f, 0, SEEK_END); + } + int64_t abs_offset = mvhd_ftello64(vhdm->f); + if (abs_offset % MVHD_SECTOR_SIZE != 0) { + /* Yikes! We're supposed to be on a sector boundary. Add some padding */ + int64_t padding_amount = (int64_t)MVHD_SECTOR_SIZE - (abs_offset % MVHD_SECTOR_SIZE); + uint8_t zero_byte = 0; + for (int i = 0; i < padding_amount; i++) { + fwrite(&zero_byte, sizeof zero_byte, 1, vhdm->f); + } + abs_offset += padding_amount; + } + uint32_t sect_offset = (uint32_t)(abs_offset / MVHD_SECTOR_SIZE); + int blk_size_sectors = vhdm->sparse.block_sz / MVHD_SECTOR_SIZE; + mvhd_write_empty_sectors(vhdm->f, vhdm->bitmap.sector_count + blk_size_sectors); + /* Add a bit of padding. That's what Windows appears to do, although it's not strictly necessary... */ + mvhd_write_empty_sectors(vhdm->f, 5); + /* And we finish with the footer */ + fwrite(footer, sizeof footer, 1, vhdm->f); + /* We no longer have a sparse block. Update that BAT! */ + vhdm->block_offset[blk] = sect_offset; + mvhd_write_bat_entry(vhdm, blk); +} + +int mvhd_fixed_read(MVHDMeta* vhdm, uint32_t offset, int num_sectors, void* out_buff) { + int64_t addr; + int transfer_sectors, truncated_sectors; + uint32_t total_sectors = (uint32_t)(vhdm->footer.curr_sz / MVHD_SECTOR_SIZE); + mvhd_check_sectors(offset, num_sectors, total_sectors, &transfer_sectors, &truncated_sectors); + addr = (int64_t)offset * MVHD_SECTOR_SIZE; + mvhd_fseeko64(vhdm->f, addr, SEEK_SET); + fread(out_buff, transfer_sectors*MVHD_SECTOR_SIZE, 1, vhdm->f); + return truncated_sectors; +} + +int mvhd_sparse_read(MVHDMeta* vhdm, uint32_t offset, int num_sectors, void* out_buff) { + int transfer_sectors, truncated_sectors; + uint32_t total_sectors = (uint32_t)(vhdm->footer.curr_sz / MVHD_SECTOR_SIZE); + mvhd_check_sectors(offset, num_sectors, total_sectors, &transfer_sectors, &truncated_sectors); + uint8_t* buff = (uint8_t*)out_buff; + int64_t addr; + uint32_t s, ls; + int blk, prev_blk, sib; + ls = offset + transfer_sectors; + prev_blk = -1; + for (s = offset; s < ls; s++) { + blk = s / vhdm->sect_per_block; + sib = s % vhdm->sect_per_block; + if (blk != prev_blk) { + prev_blk = blk; + if (vhdm->bitmap.curr_block != blk) { + mvhd_read_sect_bitmap(vhdm, blk); + mvhd_fseeko64(vhdm->f, (uint64_t)sib * MVHD_SECTOR_SIZE, SEEK_CUR); + } else { + addr = ((int64_t)vhdm->block_offset[blk] + vhdm->bitmap.sector_count + sib) * MVHD_SECTOR_SIZE; + mvhd_fseeko64(vhdm->f, addr, SEEK_SET); + } + } + if (VHD_TESTBIT(vhdm->bitmap.curr_bitmap, sib)) { + fread(buff, MVHD_SECTOR_SIZE, 1, vhdm->f); + } else { + memset(buff, 0, MVHD_SECTOR_SIZE); + mvhd_fseeko64(vhdm->f, MVHD_SECTOR_SIZE, SEEK_CUR); + } + buff += MVHD_SECTOR_SIZE; + } + return truncated_sectors; +} + +int mvhd_diff_read(MVHDMeta* vhdm, uint32_t offset, int num_sectors, void* out_buff) { + int transfer_sectors, truncated_sectors; + uint32_t total_sectors = (uint32_t)(vhdm->footer.curr_sz / MVHD_SECTOR_SIZE); + mvhd_check_sectors(offset, num_sectors, total_sectors, &transfer_sectors, &truncated_sectors); + uint8_t* buff = (uint8_t*)out_buff; + MVHDMeta* curr_vhdm = vhdm; + uint32_t s, ls; + int blk, sib; + ls = offset + transfer_sectors; + for (s = offset; s < ls; s++) { + while (curr_vhdm->footer.disk_type == MVHD_TYPE_DIFF) { + blk = s / curr_vhdm->sect_per_block; + sib = s % curr_vhdm->sect_per_block; + if (curr_vhdm->bitmap.curr_block != blk) { + mvhd_read_sect_bitmap(curr_vhdm, blk); + } + if (!VHD_TESTBIT(curr_vhdm->bitmap.curr_bitmap, sib)) { + curr_vhdm = curr_vhdm->parent; + } else { break; } + } + /* We handle actual sector reading using the fixed or sparse functions, + as a differencing VHD is also a sparse VHD */ + if (curr_vhdm->footer.disk_type == MVHD_TYPE_DIFF || curr_vhdm->footer.disk_type == MVHD_TYPE_DYNAMIC) { + mvhd_sparse_read(curr_vhdm, s, 1, buff); + } else { + mvhd_fixed_read(curr_vhdm, s, 1, buff); + } + curr_vhdm = vhdm; + buff += MVHD_SECTOR_SIZE; + } + return truncated_sectors; +} + +int mvhd_fixed_write(MVHDMeta* vhdm, uint32_t offset, int num_sectors, void* in_buff) { + int64_t addr; + int transfer_sectors, truncated_sectors; + uint32_t total_sectors = (uint32_t)(vhdm->footer.curr_sz / MVHD_SECTOR_SIZE); + mvhd_check_sectors(offset, num_sectors, total_sectors, &transfer_sectors, &truncated_sectors); + addr = (int64_t)offset * MVHD_SECTOR_SIZE; + mvhd_fseeko64(vhdm->f, addr, SEEK_SET); + fwrite(in_buff, transfer_sectors*MVHD_SECTOR_SIZE, 1, vhdm->f); + return truncated_sectors; +} + +int mvhd_sparse_diff_write(MVHDMeta* vhdm, uint32_t offset, int num_sectors, void* in_buff) { + int transfer_sectors, truncated_sectors; + uint32_t total_sectors = (uint32_t)(vhdm->footer.curr_sz / MVHD_SECTOR_SIZE); + mvhd_check_sectors(offset, num_sectors, total_sectors, &transfer_sectors, &truncated_sectors); + uint8_t* buff = (uint8_t*)in_buff; + int64_t addr; + uint32_t s, ls; + int blk, prev_blk, sib; + ls = offset + transfer_sectors; + prev_blk = -1; + for (s = offset; s < ls; s++) { + blk = s / vhdm->sect_per_block; + sib = s % vhdm->sect_per_block; + if (vhdm->block_offset[blk] == MVHD_SPARSE_BLK) { + /* "read" the sector bitmap first, before creating a new block, as the bitmap will be + zero either way */ + mvhd_read_sect_bitmap(vhdm, blk); + mvhd_create_block(vhdm, blk); + } + if (blk != prev_blk) { + if (vhdm->bitmap.curr_block != blk) { + if (prev_blk >= 0) { + /* Write the sector bitmap for the previous block, before we replace it. */ + mvhd_write_curr_sect_bitmap(vhdm); + } + mvhd_read_sect_bitmap(vhdm, blk); + mvhd_fseeko64(vhdm->f, (uint64_t)sib * MVHD_SECTOR_SIZE, SEEK_CUR); + } else { + addr = ((int64_t)vhdm->block_offset[blk] + vhdm->bitmap.sector_count + sib) * MVHD_SECTOR_SIZE; + mvhd_fseeko64(vhdm->f, addr, SEEK_SET); + } + prev_blk = blk; + } + fwrite(buff, MVHD_SECTOR_SIZE, 1, vhdm->f); + VHD_SETBIT(vhdm->bitmap.curr_bitmap, sib); + buff += MVHD_SECTOR_SIZE; + } + /* And write the sector bitmap for the last block we visited to disk */ + mvhd_write_curr_sect_bitmap(vhdm); + return truncated_sectors; +} + +int mvhd_noop_write(MVHDMeta* vhdm, uint32_t offset, int num_sectors, void* in_buff) { + return 0; +} \ No newline at end of file diff --git a/src/disk/minivhd/minivhd_io.h b/src/disk/minivhd/minivhd_io.h new file mode 100644 index 000000000..cdbfa6d77 --- /dev/null +++ b/src/disk/minivhd/minivhd_io.h @@ -0,0 +1,132 @@ +#ifndef MINIVHD_IO_H +#define MINIVHD_IO_H +#include "minivhd.h" + +/** + * \brief Write zero filled sectors to file. + * + * Note, the caller should set the file position before calling this + * function for correct operation. + * + * \param [in] f File to write sectors to + * \param [in] sector_count The number of sectors to write + */ +void mvhd_write_empty_sectors(FILE* f, int sector_count); + +/** + * \brief Read a fixed VHD image + * + * Fixed VHD images are essentially raw image files with a footer tacked on + * the end. They are therefore straightforward to write + * + * \param [in] vhdm MiniVHD data structure + * \param [in] offset Sector offset to read from + * \param [in] num_sectors The desired number of sectors to read + * \param [out] out_buff An output buffer to store read sectors. Must be + * large enough to hold num_sectors worth of sectors. + * + * \retval 0 num_sectors were read from file + * \retval >0 < num_sectors were read from file + */ +int mvhd_fixed_read(MVHDMeta* vhdm, uint32_t offset, int num_sectors, void* out_buff); + +/** + * \brief Read a sparse VHD image + * + * Sparse, or dynamic images are VHD images that grow as data is written to them. + * + * This function implements the logic to read sectors from the file, taking into + * account the fact that blocks may be stored on disk in any order, and that the + * read could cross block boundaries. + * + * \param [in] vhdm MiniVHD data structure + * \param [in] offset Sector offset to read from + * \param [in] num_sectors The desired number of sectors to read + * \param [out] out_buff An output buffer to store read sectors. Must be + * large enough to hold num_sectors worth of sectors. + * + * \retval 0 num_sectors were read from file + * \retval >0 < num_sectors were read from file + */ +int mvhd_sparse_read(MVHDMeta* vhdm, uint32_t offset, int num_sectors, void* out_buff); + +/** + * \brief Read a differencing VHD image + * + * Differencing images are a variant of a sparse image. They contain the grow-on-demand + * properties of sparse images, but also reference a parent image. Data is read from the + * child image only if it is newer than the data stored in the parent image. + * + * This function implements the logic to read sectors from the child, or a parent image. + * Differencing images may have a differencing image as a parent, creating a chain of images. + * There is no theoretical chain length limit, although I do not consider long chains to be + * advisable. Verifying the parent-child relationship is not very robust. + * + * \param [in] vhdm MiniVHD data structure + * \param [in] offset Sector offset to read from + * \param [in] num_sectors The desired number of sectors to read + * \param [out] out_buff An output buffer to store read sectors. Must be + * large enough to hold num_sectors worth of sectors. + * + * \retval 0 num_sectors were read from file + * \retval >0 < num_sectors were read from file + */ +int mvhd_diff_read(MVHDMeta* vhdm, uint32_t offset, int num_sectors, void* out_buff); + +/** + * \brief Write to a fixed VHD image + * + * Fixed VHD images are essentially raw image files with a footer tacked on + * the end. They are therefore straightforward to write + * + * \param [in] vhdm MiniVHD data structure + * \param [in] offset Sector offset to write to + * \param [in] num_sectors The desired number of sectors to write + * \param [in] in_buff A source buffer to write sectors from. Must be + * large enough to hold num_sectors worth of sectors. + * + * \retval 0 num_sectors were written to file + * \retval >0 < num_sectors were written to file + */ +int mvhd_fixed_write(MVHDMeta* vhdm, uint32_t offset, int num_sectors, void* in_buff); + +/** + * \brief Write to a sparse or differencing VHD image + * + * Sparse, or dynamic images are VHD images that grow as data is written to them. + * + * Differencing images are a variant of a sparse image. They contain the grow-on-demand + * properties of sparse images, but also reference a parent image. Data is always written + * to the child image. This makes writing to differencing images essentially identical to + * writing to sparse images, hence they use the same function. + * + * This function implements the logic to write sectors to the file, taking into + * account the fact that blocks may be stored on disk in any order, and that the + * write operation could cross block boundaries. + * + * \param [in] vhdm MiniVHD data structure + * \param [in] offset Sector offset to write to + * \param [in] num_sectors The desired number of sectors to write + * \param [in] in_buff A source buffer to write sectors from. Must be + * large enough to hold num_sectors worth of sectors. + * + * \retval 0 num_sectors were written to file + * \retval >0 < num_sectors were written to file + */ +int mvhd_sparse_diff_write(MVHDMeta* vhdm, uint32_t offset, int num_sectors, void* in_buff); + +/** + * \brief A no-op function to "write" to read-only VHD images + * + * \param [in] vhdm MiniVHD data structure + * \param [in] offset Sector offset to write to + * \param [in] num_sectors The desired number of sectors to write + * \param [in] in_buff A source buffer to write sectors from. Must be + * large enough to hold num_sectors worth of sectors. + * + * \retval 0 num_sectors were written to file + * \retval >0 < num_sectors were written to file + */ +int mvhd_noop_write(MVHDMeta* vhdm, uint32_t offset, int num_sectors, void* in_buff); + +#endif \ No newline at end of file diff --git a/src/disk/minivhd/minivhd_manage.c b/src/disk/minivhd/minivhd_manage.c new file mode 100644 index 000000000..75e095332 --- /dev/null +++ b/src/disk/minivhd/minivhd_manage.c @@ -0,0 +1,533 @@ +/** + * \file + * \brief VHD management functions (open, close, read write etc) + */ + +#include +#include +#include +#include +#include +#include "cwalk.h" +#include "libxml2_encoding.h" +#include "minivhd_internal.h" +#include "minivhd_io.h" +#include "minivhd_util.h" +#include "minivhd_struct_rw.h" +#include "minivhd.h" + +int mvhd_errno = 0; +static char tmp_open_path[MVHD_MAX_PATH_BYTES] = {0}; +struct MVHDPaths { + char dir_path[MVHD_MAX_PATH_BYTES]; + char file_name[MVHD_MAX_PATH_BYTES]; + char w2ku_path[MVHD_MAX_PATH_BYTES]; + char w2ru_path[MVHD_MAX_PATH_BYTES]; + char joined_path[MVHD_MAX_PATH_BYTES]; + uint16_t tmp_src_path[MVHD_MAX_PATH_CHARS]; +}; + +static void mvhd_read_footer(MVHDMeta* vhdm); +static void mvhd_read_sparse_header(MVHDMeta* vhdm); +static bool mvhd_footer_checksum_valid(MVHDMeta* vhdm); +static bool mvhd_sparse_checksum_valid(MVHDMeta* vhdm); +static int mvhd_read_bat(MVHDMeta *vhdm, MVHDError* err); +static void mvhd_calc_sparse_values(MVHDMeta* vhdm); +static int mvhd_init_sector_bitmap(MVHDMeta* vhdm, MVHDError* err); + +/** + * \brief Populate data stuctures with content from a VHD footer + * + * \param [in] vhdm MiniVHD data structure + */ +static void mvhd_read_footer(MVHDMeta* vhdm) { + uint8_t buffer[MVHD_FOOTER_SIZE]; + mvhd_fseeko64(vhdm->f, -MVHD_FOOTER_SIZE, SEEK_END); + fread(buffer, sizeof buffer, 1, vhdm->f); + mvhd_buffer_to_footer(&vhdm->footer, buffer); +} + +/** + * \brief Populate data stuctures with content from a VHD sparse header + * + * \param [in] vhdm MiniVHD data structure + */ +static void mvhd_read_sparse_header(MVHDMeta* vhdm) { + uint8_t buffer[MVHD_SPARSE_SIZE]; + mvhd_fseeko64(vhdm->f, vhdm->footer.data_offset, SEEK_SET); + fread(buffer, sizeof buffer, 1, vhdm->f); + mvhd_buffer_to_header(&vhdm->sparse, buffer); +} + +/** + * \brief Validate VHD footer checksum + * + * This works by generating a checksum from the footer, and comparing it against the stored checksum. + * + * \param [in] vhdm MiniVHD data structure + */ +static bool mvhd_footer_checksum_valid(MVHDMeta* vhdm) { + return vhdm->footer.checksum == mvhd_gen_footer_checksum(&vhdm->footer); +} + +/** + * \brief Validate VHD sparse header checksum + * + * This works by generating a checksum from the sparse header, and comparing it against the stored checksum. + * + * \param [in] vhdm MiniVHD data structure + */ +static bool mvhd_sparse_checksum_valid(MVHDMeta* vhdm) { + return vhdm->sparse.checksum == mvhd_gen_sparse_checksum(&vhdm->sparse); +} + +/** + * \brief Read BAT into MiniVHD data structure + * + * The Block Allocation Table (BAT) is the structure in a sparse and differencing VHD which stores + * the 4-byte sector offsets for each data block. This function allocates enough memory to contain + * the entire BAT, and then reads the contents of the BAT into the buffer. + * + * \param [in] vhdm MiniVHD data structure + * \param [out] err this is populated with MVHD_ERR_MEM if the calloc fails + * + * \retval -1 if an error occurrs. Check value of err in this case + * \retval 0 if the function call succeeds + */ +static int mvhd_read_bat(MVHDMeta *vhdm, MVHDError* err) { + vhdm->block_offset = calloc(vhdm->sparse.max_bat_ent, sizeof *vhdm->block_offset); + if (vhdm->block_offset == NULL) { + *err = MVHD_ERR_MEM; + return -1; + } + mvhd_fseeko64(vhdm->f, vhdm->sparse.bat_offset, SEEK_SET); + for (uint32_t i = 0; i < vhdm->sparse.max_bat_ent; i++) { + fread(&vhdm->block_offset[i], sizeof *vhdm->block_offset, 1, vhdm->f); + vhdm->block_offset[i] = mvhd_from_be32(vhdm->block_offset[i]); + } + return 0; +} + +/** + * \brief Perform a one-time calculation of some sparse VHD values + * + * \param [in] vhdm MiniVHD data structure + */ +static void mvhd_calc_sparse_values(MVHDMeta* vhdm) { + vhdm->sect_per_block = vhdm->sparse.block_sz / MVHD_SECTOR_SIZE; + int bm_bytes = vhdm->sect_per_block / 8; + vhdm->bitmap.sector_count = bm_bytes / MVHD_SECTOR_SIZE; + if (bm_bytes % MVHD_SECTOR_SIZE > 0) { + vhdm->bitmap.sector_count++; + } +} + +/** + * \brief Allocate memory for a sector bitmap. + * + * Each data block is preceded by a sector bitmap. Each bit indicates whether the corresponding sector + * is considered 'clean' or 'dirty' (for sparse VHD images), or whether to read from the parent or current + * image (for differencing images). + * + * \param [in] vhdm MiniVHD data structure + * \param [out] err this is populated with MVHD_ERR_MEM if the calloc fails + * + * \retval -1 if an error occurrs. Check value of err in this case + * \retval 0 if the function call succeeds + */ +static int mvhd_init_sector_bitmap(MVHDMeta* vhdm, MVHDError* err) { + vhdm->bitmap.curr_bitmap = calloc(vhdm->bitmap.sector_count, MVHD_SECTOR_SIZE); + if (vhdm->bitmap.curr_bitmap == NULL) { + *err = MVHD_ERR_MEM; + return -1; + } + vhdm->bitmap.curr_block = -1; + return 0; +} + +/** + * \brief Check if the path for a given platform code exists + * + * From the available paths, both relative and absolute, construct a full path + * and attempt to open a file at that path. + * + * Note, this function makes no attempt to verify that the path is the correct + * VHD image, or even a VHD image at all. + * + * \param [in] paths a struct containing all available paths to work with + * \param [in] the platform code to try and obtain a path for. Setting this to zero + * will try using the directory of the child image + * + * \retval true if a file is found + * \retval false if a file is not found + */ +static bool mvhd_parent_path_exists(struct MVHDPaths* paths, uint32_t plat_code) { + memset(paths->joined_path, 0, sizeof paths->joined_path); + FILE* f; + int cwk_ret, ferr; + enum cwk_path_style style = cwk_path_guess_style((const char*)paths->dir_path); + cwk_path_set_style(style); + cwk_ret = 1; + if (plat_code == MVHD_DIF_LOC_W2RU && *paths->w2ru_path) { + cwk_ret = cwk_path_join((const char*)paths->dir_path, (const char*)paths->w2ru_path, paths->joined_path, sizeof paths->joined_path); + } else if (plat_code == MVHD_DIF_LOC_W2KU && *paths->w2ku_path) { + memcpy(paths->joined_path, paths->w2ku_path, (sizeof paths->joined_path) - 1); + cwk_ret = 0; + } else if (plat_code == 0) { + cwk_ret = cwk_path_join((const char*)paths->dir_path, (const char*)paths->file_name, paths->joined_path, sizeof paths->joined_path); + } + if (cwk_ret > MVHD_MAX_PATH_BYTES) { + return false; + } + f = mvhd_fopen((const char*)paths->joined_path, "rb", &ferr); + if (f != NULL) { + /* We found a file at the requested path! */ + memcpy(tmp_open_path, paths->joined_path, (sizeof paths->joined_path) - 1); + tmp_open_path[sizeof tmp_open_path - 1] = '\0'; + fclose(f); + return true; + } else { + return false; + } +} + +/** + * \brief attempt to obtain a file path to a file that may be a valid VHD image + * + * Differential VHD images store both a UTF-16BE file name (or path), and up to + * eight "parent locator" entries. Using this information, this function tries to + * find a parent image. + * + * This function does not verify if the path returned is a valid parent image. + * + * \param [in] vhdm current MiniVHD data structure + * \param [out] err any errors that may occurr. Check this if NULL is returned + * + * \return a pointer to the global string `tmp_open_path`, or NULL if a path could + * not be found, or some error occurred + */ +static char* mvhd_get_diff_parent_path(MVHDMeta* vhdm, int* err) { + int utf_outlen, utf_inlen, utf_ret; + char* par_fp = NULL; + /* We can't resolve relative paths if we don't have an absolute + path to work with */ + if (!cwk_path_is_absolute((const char*)vhdm->filename)) { + *err = MVHD_ERR_PATH_REL; + goto end; + } + struct MVHDPaths* paths = calloc(1, sizeof *paths); + if (paths == NULL) { + *err = MVHD_ERR_MEM; + goto end; + } + size_t dirlen; + cwk_path_get_dirname((const char*)vhdm->filename, &dirlen); + if (dirlen >= sizeof paths->dir_path) { + *err = MVHD_ERR_PATH_LEN; + goto paths_cleanup; + } + memcpy(paths->dir_path, vhdm->filename, dirlen); + /* Get the filename field from the sparse header. */ + utf_outlen = (int)sizeof paths->file_name; + utf_inlen = (int)sizeof vhdm->sparse.par_utf16_name; + utf_ret = UTF16BEToUTF8((unsigned char*)paths->file_name, &utf_outlen, (const unsigned char*)vhdm->sparse.par_utf16_name, &utf_inlen); + if (utf_ret < 0) { + mvhd_set_encoding_err(utf_ret, err); + goto paths_cleanup; + } + /* Now read the parent locator entries, both relative and absolute, if they exist */ + unsigned char* loc_path; + for (int i = 0; i < 8; i++) { + utf_outlen = MVHD_MAX_PATH_BYTES - 1; + if (vhdm->sparse.par_loc_entry[i].plat_code == MVHD_DIF_LOC_W2RU) { + loc_path = (unsigned char*)paths->w2ru_path; + } else if (vhdm->sparse.par_loc_entry[i].plat_code == MVHD_DIF_LOC_W2KU) { + loc_path = (unsigned char*)paths->w2ku_path; + } else { + continue; + } + utf_inlen = vhdm->sparse.par_loc_entry[i].plat_data_len; + if (utf_inlen > MVHD_MAX_PATH_BYTES) { + *err = MVHD_ERR_PATH_LEN; + goto paths_cleanup; + } + mvhd_fseeko64(vhdm->f, vhdm->sparse.par_loc_entry[i].plat_data_offset, SEEK_SET); + fread(paths->tmp_src_path, sizeof (uint8_t), utf_inlen, vhdm->f); + /* Note, the W2*u parent locators are UTF-16LE, unlike the filename field previously obtained, + which is UTF-16BE */ + utf_ret = UTF16LEToUTF8(loc_path, &utf_outlen, (const unsigned char*)paths->tmp_src_path, &utf_inlen); + if (utf_ret < 0) { + mvhd_set_encoding_err(utf_ret, err); + goto paths_cleanup; + } + } + /* We have paths in UTF-8. We should have enough info to try and find the parent VHD */ + /* Does the relative path exist? */ + if (mvhd_parent_path_exists(paths, MVHD_DIF_LOC_W2RU)) { + par_fp = tmp_open_path; + goto paths_cleanup; + } + /* What about trying the child directory? */ + if (mvhd_parent_path_exists(paths, 0)) { + par_fp = tmp_open_path; + goto paths_cleanup; + } + /* Well, all else fails, try the stored absolute path, if it exists */ + if (mvhd_parent_path_exists(paths, MVHD_DIF_LOC_W2KU)) { + par_fp = tmp_open_path; + goto paths_cleanup; + } + /* If we reach this point, we could not find a path with a valid file */ + par_fp = NULL; + *err = MVHD_ERR_PAR_NOT_FOUND; + +paths_cleanup: + free(paths); + paths = NULL; +end: + return par_fp; +} + +/** + * \brief Attach the read/write function pointers to read/write functions + * + * Depending on the VHD type, different sector reading and writing functions are used. + * The functions are called via function pointers stored in the vhdm struct. + * + * \param [in] vhdm MiniVHD data structure + */ +static void mvhd_assign_io_funcs(MVHDMeta* vhdm) { + switch (vhdm->footer.disk_type) { + case MVHD_TYPE_FIXED: + vhdm->read_sectors = mvhd_fixed_read; + vhdm->write_sectors = mvhd_fixed_write; + break; + case MVHD_TYPE_DYNAMIC: + vhdm->read_sectors = mvhd_sparse_read; + vhdm->write_sectors = mvhd_sparse_diff_write; + break; + case MVHD_TYPE_DIFF: + vhdm->read_sectors = mvhd_diff_read; + vhdm->write_sectors = mvhd_sparse_diff_write; + break; + } + if (vhdm->readonly) { + vhdm->write_sectors = mvhd_noop_write; + } +} + +bool mvhd_file_is_vhd(FILE* f) { + if (f) { + uint8_t con_str[8]; + mvhd_fseeko64(f, -MVHD_FOOTER_SIZE, SEEK_END); + fread(con_str, sizeof con_str, 1, f); + return mvhd_is_conectix_str(con_str); + } else { + return false; + } +} + +MVHDGeom mvhd_calculate_geometry(uint64_t size) { + MVHDGeom chs; + uint32_t ts = (uint32_t)(size / MVHD_SECTOR_SIZE); + uint32_t spt, heads, cyl, cth; + if (ts > 65535 * 16 * 255) { + ts = 65535 * 16 * 255; + } + if (ts >= 65535 * 16 * 63) { + spt = 255; + heads = 16; + cth = ts / spt; + } else { + spt = 17; + cth = ts / spt; + heads = (cth + 1023) / 1024; + if (heads < 4) { + heads = 4; + } + if (cth >= (heads * 1024) || heads > 16) { + spt = 31; + heads = 16; + cth = ts / spt; + } + if (cth >= (heads * 1024)) { + spt = 63; + heads = 16; + cth = ts / spt; + } + } + cyl = cth / heads; + chs.heads = heads; + chs.spt = spt; + chs.cyl = cyl; + return chs; +} + +MVHDMeta* mvhd_open(const char* path, bool readonly, int* err) { + MVHDError open_err; + MVHDMeta *vhdm = calloc(sizeof *vhdm, 1); + if (vhdm == NULL) { + *err = MVHD_ERR_MEM; + goto end; + } + if (strlen(path) >= sizeof vhdm->filename) { + *err = MVHD_ERR_PATH_LEN; + goto cleanup_vhdm; + } + //This is safe, as we've just checked for potential overflow above + strcpy(vhdm->filename, path); + vhdm->f = readonly ? mvhd_fopen((const char*)vhdm->filename, "rb", err) : mvhd_fopen((const char*)vhdm->filename, "rb+", err); + if (vhdm->f == NULL) { + /* note, mvhd_fopen sets err for us */ + goto cleanup_vhdm; + } + vhdm->readonly = readonly; + if (!mvhd_file_is_vhd(vhdm->f)) { + *err = MVHD_ERR_NOT_VHD; + goto cleanup_file; + } + mvhd_read_footer(vhdm); + if (!mvhd_footer_checksum_valid(vhdm)) { + *err = MVHD_ERR_FOOTER_CHECKSUM; + goto cleanup_file; + } + if (vhdm->footer.disk_type == MVHD_TYPE_DIFF || vhdm->footer.disk_type == MVHD_TYPE_DYNAMIC) { + mvhd_read_sparse_header(vhdm); + if (!mvhd_sparse_checksum_valid(vhdm)) { + *err = MVHD_ERR_SPARSE_CHECKSUM; + goto cleanup_file; + } + if (mvhd_read_bat(vhdm, &open_err) == -1) { + *err = open_err; + goto cleanup_file; + } + mvhd_calc_sparse_values(vhdm); + if (mvhd_init_sector_bitmap(vhdm, &open_err) == -1) { + *err = open_err; + goto cleanup_bat; + } + + } else if (vhdm->footer.disk_type != MVHD_TYPE_FIXED) { + *err = MVHD_ERR_TYPE; + goto cleanup_bitmap; + } + mvhd_assign_io_funcs(vhdm); + vhdm->format_buffer.zero_data = calloc(64, MVHD_SECTOR_SIZE); + if (vhdm->format_buffer.zero_data == NULL) { + *err = MVHD_ERR_MEM; + goto cleanup_bitmap; + } + vhdm->format_buffer.sector_count = 64; + if (vhdm->footer.disk_type == MVHD_TYPE_DIFF) { + char* par_path = mvhd_get_diff_parent_path(vhdm, err); + if (par_path == NULL) { + goto cleanup_format_buff; + } + uint32_t par_mod_ts = mvhd_file_mod_timestamp(par_path, err); + if (*err != 0) { + goto cleanup_format_buff; + } + if (vhdm->sparse.par_timestamp != par_mod_ts) { + /* The last-modified timestamp is to fragile to make this a fatal error. + Instead, we inform the caller of the potential problem. */ + *err = MVHD_ERR_TIMESTAMP; + } + vhdm->parent = mvhd_open(par_path, true, err); + if (vhdm->parent == NULL) { + goto cleanup_format_buff; + } + if (memcmp(vhdm->sparse.par_uuid, vhdm->parent->footer.uuid, sizeof vhdm->sparse.par_uuid) != 0) { + *err = MVHD_ERR_INVALID_PAR_UUID; + goto cleanup_format_buff; + } + } + /* If we've reached this point, we are good to go, so skip the cleanup steps */ + goto end; +cleanup_format_buff: + free(vhdm->format_buffer.zero_data); + vhdm->format_buffer.zero_data = NULL; +cleanup_bitmap: + free(vhdm->bitmap.curr_bitmap); + vhdm->bitmap.curr_bitmap = NULL; +cleanup_bat: + free(vhdm->block_offset); + vhdm->block_offset = NULL; +cleanup_file: + fclose(vhdm->f); + vhdm->f = NULL; +cleanup_vhdm: + free(vhdm); + vhdm = NULL; +end: + return vhdm; +} + +void mvhd_close(MVHDMeta* vhdm) { + if (vhdm != NULL) { + if (vhdm->parent != NULL) { + mvhd_close(vhdm->parent); + } + fclose(vhdm->f); + if (vhdm->block_offset != NULL) { + free(vhdm->block_offset); + vhdm->block_offset = NULL; + } + if (vhdm->bitmap.curr_bitmap != NULL) { + free(vhdm->bitmap.curr_bitmap); + vhdm->bitmap.curr_bitmap = NULL; + } + if (vhdm->format_buffer.zero_data != NULL) { + free(vhdm->format_buffer.zero_data); + vhdm->format_buffer.zero_data = NULL; + } + free(vhdm); + vhdm = NULL; + } +} + +int mvhd_diff_update_par_timestamp(MVHDMeta* vhdm, int* err) { + uint8_t sparse_buff[1024]; + if (vhdm == NULL || err == NULL) { + *err = MVHD_ERR_INVALID_PARAMS; + return -1; + } + if (vhdm->footer.disk_type != MVHD_TYPE_DIFF) { + *err = MVHD_ERR_TYPE; + return -1; + } + char* par_path = mvhd_get_diff_parent_path(vhdm, err); + if (par_path == NULL) { + return -1; + } + uint32_t par_mod_ts = mvhd_file_mod_timestamp(par_path, err); + if (*err != 0) { + return -1; + } + /* Update the timestamp and sparse header checksum */ + vhdm->sparse.par_timestamp = par_mod_ts; + vhdm->sparse.checksum = mvhd_gen_sparse_checksum(&vhdm->sparse); + /* Generate and write the updated sparse header */ + mvhd_header_to_buffer(&vhdm->sparse, sparse_buff); + mvhd_fseeko64(vhdm->f, (int64_t)vhdm->footer.data_offset, SEEK_SET); + fwrite(sparse_buff, sizeof sparse_buff, 1, vhdm->f); + return 0; +} + +int mvhd_read_sectors(MVHDMeta* vhdm, uint32_t offset, int num_sectors, void* out_buff) { + return vhdm->read_sectors(vhdm, offset, num_sectors, out_buff); +} + +int mvhd_write_sectors(MVHDMeta* vhdm, uint32_t offset, int num_sectors, void* in_buff) { + return vhdm->write_sectors(vhdm, offset, num_sectors, in_buff); +} + +int mvhd_format_sectors(MVHDMeta* vhdm, uint32_t offset, int num_sectors) { + int num_full = num_sectors / vhdm->format_buffer.sector_count; + int remain = num_sectors % vhdm->format_buffer.sector_count; + for (int i = 0; i < num_full; i++) { + vhdm->write_sectors(vhdm, offset, vhdm->format_buffer.sector_count, vhdm->format_buffer.zero_data); + offset += vhdm->format_buffer.sector_count; + } + vhdm->write_sectors(vhdm, offset, remain, vhdm->format_buffer.zero_data); + return 0; +} \ No newline at end of file diff --git a/src/disk/minivhd/minivhd_struct_rw.c b/src/disk/minivhd/minivhd_struct_rw.c new file mode 100644 index 000000000..66f2289af --- /dev/null +++ b/src/disk/minivhd/minivhd_struct_rw.c @@ -0,0 +1,165 @@ +/** + * \file + * \brief Header and footer serialize/deserialize functions + */ + +#include +#include +#include +#include +#include +#include "minivhd_util.h" +#include "minivhd_internal.h" + +/* Read data from footer into the struct members, swapping endian where necessary + Note: order matters here! We must read each field in the order the struct is in. + Doing this may be less elegant than performing a memcpy to a packed struct, but + it avoids potential data alignment issues, and the endian swapping allows us to + use the fields directly. */ + +static void mvhd_next_buffer_to_struct(void* struct_memb, size_t memb_size, bool req_endian, uint8_t** buffer); +static void mvhd_next_struct_to_buffer(void* struct_memb, size_t memb_size, bool req_endian, uint8_t** buffer); + +/** + * \brief Get the next field from a buffer and store it in a struct member, converting endian if necessary + * + * \param [out] struct_memb struct member to save the field to + * \param [in] memb_size the size of struct_memb, in bytes + * \param [in] req_endian is the field a value that requires endian conversion (eg: uint16, uint32) + * \param [in] buffer the buffer from which fields are read from. Will be advanced at the end of the function call + */ +static void mvhd_next_buffer_to_struct(void* struct_memb, size_t memb_size, bool req_endian, uint8_t** buffer) { + memcpy(struct_memb, *buffer, memb_size); + if (req_endian) { + switch (memb_size) { + case 2: + *(uint16_t*)(struct_memb) = mvhd_from_be16(*(uint16_t*)(struct_memb)); + break; + case 4: + *(uint32_t*)(struct_memb) = mvhd_from_be32(*(uint32_t*)(struct_memb)); + break; + case 8: + *(uint64_t*)(struct_memb) = mvhd_from_be64(*(uint64_t*)(struct_memb)); + break; + } + } + *buffer += memb_size; +} + +/** + * \brief Save a struct member into a buffer, converting endian if necessary + * + * \param [in] struct_memb struct member read from + * \param [in] memb_size the size of struct_memb, in bytes + * \param [in] req_endian is the field a value that requires endian conversion (eg: uint16, uint32) + * \param [out] buffer the buffer from which struct member is saved to. Will be advanced at the end of the function call + */ +static void mvhd_next_struct_to_buffer(void* struct_memb, size_t memb_size, bool req_endian, uint8_t** buffer) { + uint8_t *buf_ptr = *buffer; + memcpy(buf_ptr, struct_memb, memb_size); + if (req_endian) { + switch (memb_size) { + case 2: + *((uint16_t*)buf_ptr) = mvhd_to_be16(*(uint16_t*)(struct_memb)); + break; + case 4: + *((uint32_t*)buf_ptr) = mvhd_to_be32(*(uint32_t*)(struct_memb)); + break; + case 8: + *((uint64_t*)buf_ptr) = mvhd_to_be64(*(uint64_t*)(struct_memb)); + break; + } + } + buf_ptr += memb_size; + *buffer = buf_ptr; +} + +void mvhd_buffer_to_footer(MVHDFooter* footer, uint8_t* buffer) { + uint8_t* buff_ptr = buffer; + mvhd_next_buffer_to_struct(&footer->cookie, sizeof footer->cookie, false, &buff_ptr); + mvhd_next_buffer_to_struct(&footer->features, sizeof footer->features, true, &buff_ptr); + mvhd_next_buffer_to_struct(&footer->fi_fmt_vers, sizeof footer->fi_fmt_vers, true, &buff_ptr); + mvhd_next_buffer_to_struct(&footer->data_offset, sizeof footer->data_offset, true, &buff_ptr); + mvhd_next_buffer_to_struct(&footer->timestamp, sizeof footer->timestamp, true, &buff_ptr); + mvhd_next_buffer_to_struct(&footer->cr_app, sizeof footer->cr_app, false, &buff_ptr); + mvhd_next_buffer_to_struct(&footer->cr_vers, sizeof footer->cr_vers, true, &buff_ptr); + mvhd_next_buffer_to_struct(&footer->cr_host_os, sizeof footer->cr_host_os, false, &buff_ptr); + mvhd_next_buffer_to_struct(&footer->orig_sz, sizeof footer->orig_sz, true, &buff_ptr); + mvhd_next_buffer_to_struct(&footer->curr_sz, sizeof footer->curr_sz, true, &buff_ptr); + mvhd_next_buffer_to_struct(&footer->geom.cyl, sizeof footer->geom.cyl, true, &buff_ptr); + mvhd_next_buffer_to_struct(&footer->geom.heads, sizeof footer->geom.heads, false, &buff_ptr); + mvhd_next_buffer_to_struct(&footer->geom.spt, sizeof footer->geom.spt, false, &buff_ptr); + mvhd_next_buffer_to_struct(&footer->disk_type, sizeof footer->disk_type, true, &buff_ptr); + mvhd_next_buffer_to_struct(&footer->checksum, sizeof footer->checksum, true, &buff_ptr); + mvhd_next_buffer_to_struct(&footer->uuid, sizeof footer->uuid, false, &buff_ptr); + mvhd_next_buffer_to_struct(&footer->saved_st, sizeof footer->saved_st, false, &buff_ptr); + mvhd_next_buffer_to_struct(&footer->reserved, sizeof footer->reserved, false, &buff_ptr); +} + +void mvhd_footer_to_buffer(MVHDFooter* footer, uint8_t* buffer) { + uint8_t* buff_ptr = buffer; + mvhd_next_struct_to_buffer(&footer->cookie, sizeof footer->cookie, false, &buff_ptr); + mvhd_next_struct_to_buffer(&footer->features, sizeof footer->features, true, &buff_ptr); + mvhd_next_struct_to_buffer(&footer->fi_fmt_vers, sizeof footer->fi_fmt_vers, true, &buff_ptr); + mvhd_next_struct_to_buffer(&footer->data_offset, sizeof footer->data_offset, true, &buff_ptr); + mvhd_next_struct_to_buffer(&footer->timestamp, sizeof footer->timestamp, true, &buff_ptr); + mvhd_next_struct_to_buffer(&footer->cr_app, sizeof footer->cr_app, false, &buff_ptr); + mvhd_next_struct_to_buffer(&footer->cr_vers, sizeof footer->cr_vers, true, &buff_ptr); + mvhd_next_struct_to_buffer(&footer->cr_host_os, sizeof footer->cr_host_os, false, &buff_ptr); + mvhd_next_struct_to_buffer(&footer->orig_sz, sizeof footer->orig_sz, true, &buff_ptr); + mvhd_next_struct_to_buffer(&footer->curr_sz, sizeof footer->curr_sz, true, &buff_ptr); + mvhd_next_struct_to_buffer(&footer->geom.cyl, sizeof footer->geom.cyl, true, &buff_ptr); + mvhd_next_struct_to_buffer(&footer->geom.heads, sizeof footer->geom.heads, false, &buff_ptr); + mvhd_next_struct_to_buffer(&footer->geom.spt, sizeof footer->geom.spt, false, &buff_ptr); + mvhd_next_struct_to_buffer(&footer->disk_type, sizeof footer->disk_type, true, &buff_ptr); + mvhd_next_struct_to_buffer(&footer->checksum, sizeof footer->checksum, true, &buff_ptr); + mvhd_next_struct_to_buffer(&footer->uuid, sizeof footer->uuid, false, &buff_ptr); + mvhd_next_struct_to_buffer(&footer->saved_st, sizeof footer->saved_st, false, &buff_ptr); + mvhd_next_struct_to_buffer(&footer->reserved, sizeof footer->reserved, false, &buff_ptr); +} + +void mvhd_buffer_to_header(MVHDSparseHeader* header, uint8_t* buffer) { + uint8_t* buff_ptr = buffer; + mvhd_next_buffer_to_struct(&header->cookie, sizeof header->cookie, false, &buff_ptr); + mvhd_next_buffer_to_struct(&header->data_offset, sizeof header->data_offset, true, &buff_ptr); + mvhd_next_buffer_to_struct(&header->bat_offset, sizeof header->bat_offset, true, &buff_ptr); + mvhd_next_buffer_to_struct(&header->head_vers, sizeof header->head_vers, true, &buff_ptr); + mvhd_next_buffer_to_struct(&header->max_bat_ent, sizeof header->max_bat_ent, true, &buff_ptr); + mvhd_next_buffer_to_struct(&header->block_sz, sizeof header->block_sz, true, &buff_ptr); + mvhd_next_buffer_to_struct(&header->checksum, sizeof header->checksum, true, &buff_ptr); + mvhd_next_buffer_to_struct(&header->par_uuid, sizeof header->par_uuid, false, &buff_ptr); + mvhd_next_buffer_to_struct(&header->par_timestamp, sizeof header->par_timestamp, true, &buff_ptr); + mvhd_next_buffer_to_struct(&header->reserved_1, sizeof header->reserved_1, true, &buff_ptr); + mvhd_next_buffer_to_struct(&header->par_utf16_name, sizeof header->par_utf16_name, false, &buff_ptr); + for (int i = 0; i < 8; i++) { + mvhd_next_buffer_to_struct(&header->par_loc_entry[i].plat_code, sizeof header->par_loc_entry[i].plat_code, true, &buff_ptr); + mvhd_next_buffer_to_struct(&header->par_loc_entry[i].plat_data_space, sizeof header->par_loc_entry[i].plat_data_space, true, &buff_ptr); + mvhd_next_buffer_to_struct(&header->par_loc_entry[i].plat_data_len, sizeof header->par_loc_entry[i].plat_data_len, true, &buff_ptr); + mvhd_next_buffer_to_struct(&header->par_loc_entry[i].reserved, sizeof header->par_loc_entry[i].reserved, true, &buff_ptr); + mvhd_next_buffer_to_struct(&header->par_loc_entry[i].plat_data_offset, sizeof header->par_loc_entry[i].plat_data_offset, true, &buff_ptr); + } + mvhd_next_buffer_to_struct(&header->reserved_2, sizeof header->reserved_2, false, &buff_ptr); +} + +void mvhd_header_to_buffer(MVHDSparseHeader* header, uint8_t* buffer) { + uint8_t* buff_ptr = buffer; + mvhd_next_struct_to_buffer(&header->cookie, sizeof header->cookie, false, &buff_ptr); + mvhd_next_struct_to_buffer(&header->data_offset, sizeof header->data_offset, true, &buff_ptr); + mvhd_next_struct_to_buffer(&header->bat_offset, sizeof header->bat_offset, true, &buff_ptr); + mvhd_next_struct_to_buffer(&header->head_vers, sizeof header->head_vers, true, &buff_ptr); + mvhd_next_struct_to_buffer(&header->max_bat_ent, sizeof header->max_bat_ent, true, &buff_ptr); + mvhd_next_struct_to_buffer(&header->block_sz, sizeof header->block_sz, true, &buff_ptr); + mvhd_next_struct_to_buffer(&header->checksum, sizeof header->checksum, true, &buff_ptr); + mvhd_next_struct_to_buffer(&header->par_uuid, sizeof header->par_uuid, false, &buff_ptr); + mvhd_next_struct_to_buffer(&header->par_timestamp, sizeof header->par_timestamp, true, &buff_ptr); + mvhd_next_struct_to_buffer(&header->reserved_1, sizeof header->reserved_1, true, &buff_ptr); + mvhd_next_struct_to_buffer(&header->par_utf16_name, sizeof header->par_utf16_name, false, &buff_ptr); + for (int i = 0; i < 8; i++) { + mvhd_next_struct_to_buffer(&header->par_loc_entry[i].plat_code, sizeof header->par_loc_entry[i].plat_code, true, &buff_ptr); + mvhd_next_struct_to_buffer(&header->par_loc_entry[i].plat_data_space, sizeof header->par_loc_entry[i].plat_data_space, true, &buff_ptr); + mvhd_next_struct_to_buffer(&header->par_loc_entry[i].plat_data_len, sizeof header->par_loc_entry[i].plat_data_len, true, &buff_ptr); + mvhd_next_struct_to_buffer(&header->par_loc_entry[i].reserved, sizeof header->par_loc_entry[i].reserved, true, &buff_ptr); + mvhd_next_struct_to_buffer(&header->par_loc_entry[i].plat_data_offset, sizeof header->par_loc_entry[i].plat_data_offset, true, &buff_ptr); + } + mvhd_next_struct_to_buffer(&header->reserved_2, sizeof header->reserved_2, false, &buff_ptr); +} \ No newline at end of file diff --git a/src/disk/minivhd/minivhd_struct_rw.h b/src/disk/minivhd/minivhd_struct_rw.h new file mode 100644 index 000000000..0b1e0181f --- /dev/null +++ b/src/disk/minivhd/minivhd_struct_rw.h @@ -0,0 +1,38 @@ +#ifndef MINIVHD_STRUCT_RW_H +#define minivhd_struct_rw + +#include "minivhd_internal.h" + +/** + * \brief Save the contents of a VHD footer from a buffer to a struct + * + * \param [out] footer save contents of buffer into footer + * \param [in] buffer VHD footer in raw bytes + */ +void mvhd_buffer_to_footer(MVHDFooter* footer, uint8_t* buffer); + +/** + * \brief Save the contents of a VHD sparse header from a buffer to a struct + * + * \param [out] header save contents of buffer into header + * \param [in] buffer VHD header in raw bytes + */ +void mvhd_buffer_to_header(MVHDSparseHeader* header, uint8_t* buffer); + +/** + * \brief Save the contents of a VHD footer struct to a buffer + * + * \param [in] footer save contents of struct into buffer + * \param [out] buffer VHD footer in raw bytes + */ +void mvhd_footer_to_buffer(MVHDFooter* footer, uint8_t* buffer); + +/** + * \brief Save the contents of a VHD sparse header struct to a buffer + * + * \param [in] header save contents of struct into buffer + * \param [out] buffer VHD sparse header in raw bytes + */ +void mvhd_header_to_buffer(MVHDSparseHeader* header, uint8_t* buffer); + +#endif \ No newline at end of file diff --git a/src/disk/minivhd/minivhd_util.c b/src/disk/minivhd/minivhd_util.c new file mode 100644 index 000000000..d8f44cad0 --- /dev/null +++ b/src/disk/minivhd/minivhd_util.c @@ -0,0 +1,323 @@ +/** + * \file + * \brief Utility functions + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "libxml2_encoding.h" +#include "minivhd_internal.h" +#include "minivhd_util.h" + +const char MVHD_CONECTIX_COOKIE[] = "conectix"; +const char MVHD_CREATOR[] = "pcem"; +const char MVHD_CREATOR_HOST_OS[] = "Wi2k"; +const char MVHD_CXSPARSE_COOKIE[] = "cxsparse"; + +uint16_t mvhd_from_be16(uint16_t val) { + uint8_t *tmp = (uint8_t*)&val; + uint16_t ret = 0; + ret |= (uint16_t)tmp[0] << 8; + ret |= (uint16_t)tmp[1] << 0; + return ret; +} +uint32_t mvhd_from_be32(uint32_t val) { + uint8_t *tmp = (uint8_t*)&val; + uint32_t ret = 0; + ret = (uint32_t)tmp[0] << 24; + ret |= (uint32_t)tmp[1] << 16; + ret |= (uint32_t)tmp[2] << 8; + ret |= (uint32_t)tmp[3] << 0; + return ret; +} +uint64_t mvhd_from_be64(uint64_t val) { + uint8_t *tmp = (uint8_t*)&val; + uint64_t ret = 0; + ret = (uint64_t)tmp[0] << 56; + ret |= (uint64_t)tmp[1] << 48; + ret |= (uint64_t)tmp[2] << 40; + ret |= (uint64_t)tmp[3] << 32; + ret |= (uint64_t)tmp[4] << 24; + ret |= (uint64_t)tmp[5] << 16; + ret |= (uint64_t)tmp[6] << 8; + ret |= (uint64_t)tmp[7] << 0; + return ret; +} +uint16_t mvhd_to_be16(uint16_t val) { + uint16_t ret = 0; + uint8_t *tmp = (uint8_t*)&ret; + tmp[0] = (val & 0xff00) >> 8; + tmp[1] = (val & 0x00ff) >> 0; + return ret; +} +uint32_t mvhd_to_be32(uint32_t val) { + uint32_t ret = 0; + uint8_t *tmp = (uint8_t*)&ret; + tmp[0] = (val & 0xff000000) >> 24; + tmp[1] = (val & 0x00ff0000) >> 16; + tmp[2] = (val & 0x0000ff00) >> 8; + tmp[3] = (val & 0x000000ff) >> 0; + return ret; +} +uint64_t mvhd_to_be64(uint64_t val) { + uint64_t ret = 0; + uint8_t *tmp = (uint8_t*)&ret; + tmp[0] = (uint8_t)((val & 0xff00000000000000) >> 56); + tmp[1] = (uint8_t)((val & 0x00ff000000000000) >> 48); + tmp[2] = (uint8_t)((val & 0x0000ff0000000000) >> 40); + tmp[3] = (uint8_t)((val & 0x000000ff00000000) >> 32); + tmp[4] = (uint8_t)((val & 0x00000000ff000000) >> 24); + tmp[5] = (uint8_t)((val & 0x0000000000ff0000) >> 16); + tmp[6] = (uint8_t)((val & 0x000000000000ff00) >> 8); + tmp[7] = (uint8_t)((val & 0x00000000000000ff) >> 0); + return ret; +} + +bool mvhd_is_conectix_str(const void* buffer) { + if (strncmp(buffer, MVHD_CONECTIX_COOKIE, strlen(MVHD_CONECTIX_COOKIE)) == 0) { + return true; + } else { + return false; + } +} + +void mvhd_generate_uuid(uint8_t* uuid) +{ + /* We aren't doing crypto here, so using system time as seed should be good enough */ + srand((unsigned int)time(0)); + for (int n = 0; n < 16; n++) { + uuid[n] = rand(); + } + uuid[6] &= 0x0F; + uuid[6] |= 0x40; /* Type 4 */ + uuid[8] &= 0x3F; + uuid[8] |= 0x80; /* Variant 1 */ +} + +uint32_t vhd_calc_timestamp(void) +{ + time_t start_time; + time_t curr_time; + double vhd_time; + start_time = MVHD_START_TS; /* 1 Jan 2000 00:00 */ + curr_time = time(NULL); + vhd_time = difftime(curr_time, start_time); + return (uint32_t)vhd_time; +} + +uint32_t mvhd_epoch_to_vhd_ts(time_t ts) { + time_t start_time = MVHD_START_TS; + if (ts < start_time) { + return start_time; + } + double vhd_time = difftime(ts, start_time); + return (uint32_t)vhd_time; +} + +time_t vhd_get_created_time(MVHDMeta *vhdm) +{ + time_t vhd_time = (time_t)vhdm->footer.timestamp; + time_t vhd_time_unix = MVHD_START_TS + vhd_time; + return vhd_time_unix; +} + +FILE* mvhd_fopen(const char* path, const char* mode, int* err) { + FILE* f = NULL; +#ifdef _WIN32 + size_t path_len = strlen(path); + size_t mode_len = strlen(mode); + mvhd_utf16 new_path[260] = {0}; + int new_path_len = (sizeof new_path) - 2; + mvhd_utf16 mode_str[5] = {0}; + int new_mode_len = (sizeof mode_str) - 2; + int path_res = UTF8ToUTF16LE((unsigned char*)new_path, &new_path_len, (const unsigned char*)path, (int*)&path_len); + int mode_res = UTF8ToUTF16LE((unsigned char*)mode_str, &new_mode_len, (const unsigned char*)mode, (int*)&mode_len); + if (path_res > 0 && mode_res > 0) { + f = _wfopen(new_path, mode_str); + if (f == NULL) { + mvhd_errno = errno; + *err = MVHD_ERR_FILE; + } + } else { + if (path_res == -1 || mode_res == -1) { + *err = MVHD_ERR_UTF_SIZE; + } else if (path_res == -2 || mode_res == -2) { + *err = MVHD_ERR_UTF_TRANSCODING_FAILED; + } + } +#else + f = fopen64(path, mode); + if (f == NULL) { + mvhd_errno = errno; + *err = MVHD_ERR_FILE; + } +#endif + return f; +} + +void mvhd_set_encoding_err(int encoding_retval, int* err) { + if (encoding_retval == -1) { + *err = MVHD_ERR_UTF_SIZE; + } else if (encoding_retval == -2) { + *err = MVHD_ERR_UTF_TRANSCODING_FAILED; + } +} + +uint64_t mvhd_calc_size_bytes(MVHDGeom *geom) { + uint64_t img_size = (uint64_t)geom->cyl * (uint64_t)geom->heads * (uint64_t)geom->spt * (uint64_t)MVHD_SECTOR_SIZE; + return img_size; +} + +uint32_t mvhd_calc_size_sectors(MVHDGeom *geom) { + uint32_t sector_size = (uint32_t)geom->cyl * (uint32_t)geom->heads * (uint32_t)geom->spt; + return sector_size; +} + +MVHDGeom mvhd_get_geometry(MVHDMeta* vhdm) { + MVHDGeom geometry = { .cyl = vhdm->footer.geom.cyl, .heads = vhdm->footer.geom.heads, .spt = vhdm->footer.geom.spt }; + return geometry; +} + +uint32_t mvhd_gen_footer_checksum(MVHDFooter* footer) { + uint32_t new_chk = 0; + uint32_t orig_chk = footer->checksum; + footer->checksum = 0; + uint8_t* footer_bytes = (uint8_t*)footer; + for (size_t i = 0; i < sizeof *footer; i++) { + new_chk += footer_bytes[i]; + } + footer->checksum = orig_chk; + return ~new_chk; +} + +uint32_t mvhd_gen_sparse_checksum(MVHDSparseHeader* header) { + uint32_t new_chk = 0; + uint32_t orig_chk = header->checksum; + header->checksum = 0; + uint8_t* sparse_bytes = (uint8_t*)header; + for (size_t i = 0; i < sizeof *header; i++) { + new_chk += sparse_bytes[i]; + } + header->checksum = orig_chk; + return ~new_chk; +} + +const char* mvhd_strerr(MVHDError err) { + switch (err) { + case MVHD_ERR_MEM: + return "memory allocation error"; + case MVHD_ERR_FILE: + return "file error"; + case MVHD_ERR_NOT_VHD: + return "file is not a VHD image"; + case MVHD_ERR_TYPE: + return "unsupported VHD image type"; + case MVHD_ERR_FOOTER_CHECKSUM: + return "invalid VHD footer checksum"; + case MVHD_ERR_SPARSE_CHECKSUM: + return "invalid VHD sparse header checksum"; + case MVHD_ERR_UTF_TRANSCODING_FAILED: + return "error converting path encoding"; + case MVHD_ERR_UTF_SIZE: + return "buffer size mismatch when converting path encoding"; + case MVHD_ERR_PATH_REL: + return "relative path detected where absolute path expected"; + case MVHD_ERR_PATH_LEN: + return "path length exceeds MVHD_MAX_PATH"; + case MVHD_ERR_PAR_NOT_FOUND: + return "parent VHD image not found"; + case MVHD_ERR_INVALID_PAR_UUID: + return "UUID mismatch between child and parent VHD"; + case MVHD_ERR_INVALID_GEOM: + return "invalid geometry detected"; + case MVHD_ERR_INVALID_SIZE: + return "invalid size"; + case MVHD_ERR_INVALID_BLOCK_SIZE: + return "invalid block size"; + case MVHD_ERR_INVALID_PARAMS: + return "invalid parameters passed to function"; + case MVHD_ERR_CONV_SIZE: + return "error converting image. Size mismatch detechted"; + default: + return "unknown error"; + } +} + +int64_t mvhd_ftello64(FILE* stream) +{ +#ifdef _MSC_VER + return _ftelli64(stream); +#else + return ftello64(stream); +#endif +} + +int mvhd_fseeko64(FILE* stream, int64_t offset, int origin) +{ +#ifdef _MSC_VER + return _fseeki64(stream, offset, origin); +#else + return fseeko64(stream, offset, origin); +#endif +} + +uint32_t mvhd_crc32_for_byte(uint32_t r) { + for (int j = 0; j < 8; ++j) + r = (r & 1 ? 0 : (uint32_t)0xEDB88320L) ^ r >> 1; + return r ^ (uint32_t)0xFF000000L; +} + +uint32_t mvhd_crc32(const void* data, size_t n_bytes) { + static uint32_t table[0x100]; + if (!*table) + for (size_t i = 0; i < 0x100; ++i) + table[i] = mvhd_crc32_for_byte(i); + + uint32_t crc = 0; + for (size_t i = 0; i < n_bytes; ++i) + crc = table[(uint8_t)crc ^ ((uint8_t*)data)[i]] ^ crc >> 8; + + return crc; +} + +uint32_t mvhd_file_mod_timestamp(const char* path, int *err) { + *err = 0; +#ifdef _WIN32 + struct _stat file_stat; + size_t path_len = strlen(path); + mvhd_utf16 new_path[260] = {0}; + int new_path_len = (sizeof new_path) - 2; + int path_res = UTF8ToUTF16LE((unsigned char*)new_path, &new_path_len, (const unsigned char*)path, (int*)&path_len); + if (path_res > 0) { + int stat_res = _wstat(new_path, &file_stat); + if (stat_res != 0) { + mvhd_errno = errno; + *err = MVHD_ERR_FILE; + return 0; + } + return mvhd_epoch_to_vhd_ts(file_stat.st_mtime); + } else { + if (path_res == -1) { + *err = MVHD_ERR_UTF_SIZE; + } else if (path_res == -2) { + *err = MVHD_ERR_UTF_TRANSCODING_FAILED; + } + return 0; + } +#else + struct stat file_stat; + int stat_res = stat(path, &file_stat); + if (stat_res != 0) { + mvhd_errno = errno; + *err = MVHD_ERR_FILE; + return 0; + } + return mvhd_epoch_to_vhd_ts(file_stat.st_mtime); +#endif +} diff --git a/src/disk/minivhd/minivhd_util.h b/src/disk/minivhd/minivhd_util.h new file mode 100644 index 000000000..df6841009 --- /dev/null +++ b/src/disk/minivhd/minivhd_util.h @@ -0,0 +1,136 @@ +#ifndef MINIVHD_UTIL_H +#define MINIVHD_UTIL_H + +#include +#include +#include +#include "minivhd_internal.h" +#include "minivhd.h" +#define MVHD_START_TS 946684800 + +/** + * Functions to deal with endian issues + */ +uint16_t mvhd_from_be16(uint16_t val); +uint32_t mvhd_from_be32(uint32_t val); +uint64_t mvhd_from_be64(uint64_t val); +uint16_t mvhd_to_be16(uint16_t val); +uint32_t mvhd_to_be32(uint32_t val); +uint64_t mvhd_to_be64(uint64_t val); + +/** + * \brief Check if provided buffer begins with the string "conectix" + * + * \param [in] buffer The buffer to compare. Must be at least 8 bytes in length + * + * \return true if the buffer begins with "conectix" + * \return false if the buffer does not begin with "conectix" + */ +bool mvhd_is_conectix_str(const void* buffer); + +/** + * \brief Generate a raw 16 byte UUID + * + * \param [out] uuid A 16 byte buffer in which the generated UUID will be stored to + */ +void mvhd_generate_uuid(uint8_t *uuid); + +/** + * \brief Calculate a VHD formatted timestamp from the current time + */ +uint32_t vhd_calc_timestamp(void); + +/** + * \brief Convert an epoch timestamp to a VHD timestamp + * + * \param [in] ts epoch timestamp to convert. + * + * \return The adjusted timestamp, or 0 if the input timestamp is + * earlier that 1 Janurary 2000 + */ +uint32_t mvhd_epoch_to_vhd_ts(time_t ts); + +/** + * \brief Return the created time from a VHD image + * + * \param [in] vhdm Pointer to the MiniVHD metadata structure + * + * \return The created time, as a Unix timestamp + */ +time_t vhd_get_created_time(MVHDMeta *vhdm); + +/** + * \brief Cross platform, unicode filepath opening + * + * This function accounts for the fact that fopen() handles file paths differently compared to other + * operating systems. Windows version of fopen() will not handle multi byte encoded text like UTF-8. + * + * Unicode filepath support on Windows requires using the _wfopen() function, which expects UTF-16LE + * encoded path and modestring. + * + * \param [in] path The filepath to open as a UTF-8 string + * \param [in] mode The mode string to use (eg: "rb+"") + * \param [out] err The error value, if an error occurrs + * + * \return a FILE pointer if successful, NULL otherwise. If NULL, check the value of err + */ +FILE* mvhd_fopen(const char* path, const char* mode, int* err); + +void mvhd_set_encoding_err(int encoding_retval, int* err); +uint64_t mvhd_calc_size_bytes(MVHDGeom *geom); +uint32_t mvhd_calc_size_sectors(MVHDGeom *geom); +MVHDGeom mvhd_get_geometry(MVHDMeta* vhdm); + +/** + * \brief Generate VHD footer checksum + * + * \param [in] vhdm MiniVHD data structure + */ +uint32_t mvhd_gen_footer_checksum(MVHDFooter* footer); + +/** + * \brief Generate VHD sparse header checksum + * + * \param [in] vhdm MiniVHD data structure + */ +uint32_t mvhd_gen_sparse_checksum(MVHDSparseHeader* header); + +/** + * \brief Get current position in file stream + * + * This is a portable version of the POSIX ftello64(). * + */ +int64_t mvhd_ftello64(FILE* stream); + +/** + * \brief Reposition the file stream's position + * + * This is a portable version of the POSIX fseeko64(). * + */ +int mvhd_fseeko64(FILE* stream, int64_t offset, int origin); + +/** + * \brief Calculate the CRC32 of a data buffer. + * + * This function can be used for verifying data integrity. + * + * \param [in] data The data buffer + * \param [in] n_bytes The size of the data buffer in bytes + * + * \return The CRC32 of the data buffer + */ +uint32_t mvhd_crc32(const void* data, size_t n_bytes); + +/** + * \brief Calculate the file modification timestamp. + * + * This function is primarily to help protect differencing VHD's + * + * \param [in] path the UTF-8 file path + * \param [out] err The error value, if an error occurrs + * + * \return The file modified timestamp, in VHD compatible timestamp. + * 'err' will be set to non-zero on error + */ +uint32_t mvhd_file_mod_timestamp(const char* path, int *err); +#endif diff --git a/src/win/Makefile.mingw b/src/win/Makefile.mingw index ffe6b66bf..f411e2412 100644 --- a/src/win/Makefile.mingw +++ b/src/win/Makefile.mingw @@ -303,7 +303,7 @@ endif # Nothing should need changing from here on.. # ######################################################################### VPATH := $(EXPATH) . $(CODEGEN) cpu \ - cdrom chipset device disk floppy \ + cdrom chipset device disk disk/minivhd floppy \ game machine mem printer \ sio sound \ sound/munt sound/munt/c_interface sound/munt/sha1 \ @@ -690,6 +690,10 @@ HDDOBJ := hdd.o \ hdc_xtide.o hdc_ide.o \ hdc_ide_opti611.o \ hdc_ide_cmd640.o hdc_ide_sff8038i.o + +MINIVHDOBJ := cwalk.o libxml2_encoding.o minivhd_convert.o \ + minivhd_create.o minivhd_io.o minivhd_manage.o \ + minivhd_struct_rw.o minivhd_util.o CDROMOBJ := cdrom.o \ cdrom_image_backend.o cdrom_image.o @@ -789,7 +793,7 @@ else endif OBJ := $(MAINOBJ) $(CPUOBJ) $(CHIPSETOBJ) $(MCHOBJ) $(DEVOBJ) $(MEMOBJ) \ - $(FDDOBJ) $(GAMEOBJ) $(CDROMOBJ) $(ZIPOBJ) $(MOOBJ) $(HDDOBJ) \ + $(FDDOBJ) $(GAMEOBJ) $(CDROMOBJ) $(ZIPOBJ) $(MOOBJ) $(HDDOBJ) $(MINIVHDOBJ) \ $(NETOBJ) $(PRINTOBJ) $(SCSIOBJ) $(SIOOBJ) $(SNDOBJ) $(VIDOBJ) \ $(PLATOBJ) $(UIOBJ) $(FSYNTHOBJ) $(MUNTOBJ) $(DEVBROBJ) \ $(DISCORDOBJ)