Comparing two large binary files in C
Clash Royale CLAN TAG#URR8PPP
up vote
4
down vote
favorite
I'm trying to read two sufficiently large binary files, comparing them and printing the offset at which they differ. I'm using fread
to read the binary files and memcmp
to compare them.
#include<stdio.h>
#include<time.h>
clock_t start, end;
int main(int argc, char *argv)
double cpu_time_taken;
FILE *fp1, *fp2;
printf("nArgument count: %d", argc);
printf("nFile 1 is: %s", argv[1]);
printf("nFile 2 is: %sn", argv[2]);
if (argc < 3)
printf("nInsufficient Arguments: n");
printf("nHelp:./executable <filename1> <filename2>n");
return 0;
else
fp1 = fopen(argv[1], "rb");
if (fp1 == NULL)
printf("nError in opening file %s", argv[1]);
return 0;
fp2 = fopen(argv[2], "rb");
if (fp2 == NULL)
printf("nError in opening file %s", argv[2]);
return 0;
if ((fp1 != NULL) && (fp2 != NULL))
start = clock();
compare_two_binary_files(fp1, fp2);
end = clock();
cpu_time_taken = ((double) (end - start)) / CLOCKS_PER_SEC;
printf("nTime taken to compare: %f", cpu_time_taken*1000);
int compare_two_binary_files(FILE *fp1, FILE *fp2)
!feof(fp2))
fread (tmp1, sizeof *tmp1, readsz, fp1);
fread (tmp2, sizeof *tmp2, readsz, fp2);
count += 16;
if(memcmp(tmp1, tmp2, readsz))
for(int i=0; i < readsz; i++)
printf ("%d: 0x%02x ",i, tmp1[i]);
printf("n%x", count);
return 0;
performance c file
New contributor
add a comment |Â
up vote
4
down vote
favorite
I'm trying to read two sufficiently large binary files, comparing them and printing the offset at which they differ. I'm using fread
to read the binary files and memcmp
to compare them.
#include<stdio.h>
#include<time.h>
clock_t start, end;
int main(int argc, char *argv)
double cpu_time_taken;
FILE *fp1, *fp2;
printf("nArgument count: %d", argc);
printf("nFile 1 is: %s", argv[1]);
printf("nFile 2 is: %sn", argv[2]);
if (argc < 3)
printf("nInsufficient Arguments: n");
printf("nHelp:./executable <filename1> <filename2>n");
return 0;
else
fp1 = fopen(argv[1], "rb");
if (fp1 == NULL)
printf("nError in opening file %s", argv[1]);
return 0;
fp2 = fopen(argv[2], "rb");
if (fp2 == NULL)
printf("nError in opening file %s", argv[2]);
return 0;
if ((fp1 != NULL) && (fp2 != NULL))
start = clock();
compare_two_binary_files(fp1, fp2);
end = clock();
cpu_time_taken = ((double) (end - start)) / CLOCKS_PER_SEC;
printf("nTime taken to compare: %f", cpu_time_taken*1000);
int compare_two_binary_files(FILE *fp1, FILE *fp2)
!feof(fp2))
fread (tmp1, sizeof *tmp1, readsz, fp1);
fread (tmp2, sizeof *tmp2, readsz, fp2);
count += 16;
if(memcmp(tmp1, tmp2, readsz))
for(int i=0; i < readsz; i++)
printf ("%d: 0x%02x ",i, tmp1[i]);
printf("n%x", count);
return 0;
performance c file
New contributor
add a comment |Â
up vote
4
down vote
favorite
up vote
4
down vote
favorite
I'm trying to read two sufficiently large binary files, comparing them and printing the offset at which they differ. I'm using fread
to read the binary files and memcmp
to compare them.
#include<stdio.h>
#include<time.h>
clock_t start, end;
int main(int argc, char *argv)
double cpu_time_taken;
FILE *fp1, *fp2;
printf("nArgument count: %d", argc);
printf("nFile 1 is: %s", argv[1]);
printf("nFile 2 is: %sn", argv[2]);
if (argc < 3)
printf("nInsufficient Arguments: n");
printf("nHelp:./executable <filename1> <filename2>n");
return 0;
else
fp1 = fopen(argv[1], "rb");
if (fp1 == NULL)
printf("nError in opening file %s", argv[1]);
return 0;
fp2 = fopen(argv[2], "rb");
if (fp2 == NULL)
printf("nError in opening file %s", argv[2]);
return 0;
if ((fp1 != NULL) && (fp2 != NULL))
start = clock();
compare_two_binary_files(fp1, fp2);
end = clock();
cpu_time_taken = ((double) (end - start)) / CLOCKS_PER_SEC;
printf("nTime taken to compare: %f", cpu_time_taken*1000);
int compare_two_binary_files(FILE *fp1, FILE *fp2)
!feof(fp2))
fread (tmp1, sizeof *tmp1, readsz, fp1);
fread (tmp2, sizeof *tmp2, readsz, fp2);
count += 16;
if(memcmp(tmp1, tmp2, readsz))
for(int i=0; i < readsz; i++)
printf ("%d: 0x%02x ",i, tmp1[i]);
printf("n%x", count);
return 0;
performance c file
New contributor
I'm trying to read two sufficiently large binary files, comparing them and printing the offset at which they differ. I'm using fread
to read the binary files and memcmp
to compare them.
#include<stdio.h>
#include<time.h>
clock_t start, end;
int main(int argc, char *argv)
double cpu_time_taken;
FILE *fp1, *fp2;
printf("nArgument count: %d", argc);
printf("nFile 1 is: %s", argv[1]);
printf("nFile 2 is: %sn", argv[2]);
if (argc < 3)
printf("nInsufficient Arguments: n");
printf("nHelp:./executable <filename1> <filename2>n");
return 0;
else
fp1 = fopen(argv[1], "rb");
if (fp1 == NULL)
printf("nError in opening file %s", argv[1]);
return 0;
fp2 = fopen(argv[2], "rb");
if (fp2 == NULL)
printf("nError in opening file %s", argv[2]);
return 0;
if ((fp1 != NULL) && (fp2 != NULL))
start = clock();
compare_two_binary_files(fp1, fp2);
end = clock();
cpu_time_taken = ((double) (end - start)) / CLOCKS_PER_SEC;
printf("nTime taken to compare: %f", cpu_time_taken*1000);
int compare_two_binary_files(FILE *fp1, FILE *fp2)
!feof(fp2))
fread (tmp1, sizeof *tmp1, readsz, fp1);
fread (tmp2, sizeof *tmp2, readsz, fp2);
count += 16;
if(memcmp(tmp1, tmp2, readsz))
for(int i=0; i < readsz; i++)
printf ("%d: 0x%02x ",i, tmp1[i]);
printf("n%x", count);
return 0;
performance c file
performance c file
New contributor
New contributor
edited 2 hours ago
200_success
125k14144402
125k14144402
New contributor
asked 2 hours ago
ampat
211
211
New contributor
New contributor
add a comment |Â
add a comment |Â
4 Answers
4
active
oldest
votes
up vote
2
down vote
fread (tmp1, sizeof *tmp1, readsz, fp1);
fread (tmp2, sizeof *tmp2, readsz, fp2);
count += 16;
if(memcmp(tmp1, tmp2, readsz))
â¦
You are discarding the return values of the fread()
calls, blindly assuming that they both successfully read 16 bytes.
It is unclear what the return value of compare_two_binary_files(â¦, â¦)
means. In fact, you sometimes don't return a value at all. Your compiler should have warned you about that problem.
File I/O should be done a block at a time: 512 bytes, 1024 bytes, or 2048 bytes would be a more reasonable chunk size than 16.
Technically, most of the time is spent waiting for I/O. cpu_time_taken
is a misnomer: what you're measuring is called "wall clock time".
add a comment |Â
up vote
1
down vote
if ((fp1 != NULL) && (fp2 != NULL))
test is redundant. If one of them happened to beNULL
, the program is already terminated.Don't print error messages to the
stdout
. There isstderr
for that purpose.When printing to stdout, keep in mind that it is usually line buffered. The text stays in the internal buffer until a newline is printed. That's why it is important to print a newline after the message, not before.
printf("nError in opening file %s", argv[1]);
doesn't tell the most important part: why didfopen
fail. Printstrerror(errno)
as well. Combining the above bullets,fprintf(stderr, "Error in opening file %s: %sn", argv[1], strerror(errno));
You'd need to
#include <errno.h>
for that.
"why it is important to print a newline after the message" is not necessarily sufficient. What are the rules of automatic flushing stdout buffer in C?. Considerfflush(stdout);
.
â chux
1 hour ago
I like the idea of of informative error messages, yet printing a faulty file name is sometimes itself a problem. At a minimum, it is useful to include sententials to demarcate the iffy file name.file %s
-->file "%s"
.
â chux
32 secs ago
add a comment |Â
up vote
1
down vote
I'm trying to read two sufficiently large binary files, comparing them and printing the offset at which they differ.
compare_two_binary_files()
is faulty for various reasons.
The offset in which they differ can readily exceed the
int
range. Recommend the widest integer available instead. File sizes are not limited by the integer types available, yetuintmax_t
is a very reasonable assumption of being sufficient for a file size.Checking
feof()
only ignores the possibility of a rare input error messing up the compare.compare_two_binary_files()
can return without a returning a value. A compiler warning should have occurred. Enable all warnings or use a better compiler.Ignoring the return value of
fread()
is wrong.if(memcmp(tmp1, tmp2, readsz)){
is questionable. Theif()
is true when the buffer differ.
Suggested alternative:
// Use something more generous than 16. Maybe even 4096 or 64k and allocate buffers
#define CMP_N 256
// Return value:
// 0: files compare equal in content and length, fp1 size saved as offset
// 1: files differ, fp1 longer, fp2 size saved as offset
// 2: files differ, fp2 longer, fp1 size saved as offset
// 3: files differ at offset
// -1: fp1 trouble reading. Unspecified data in offset
// -2: fp2 trouble reading. Unspecified data in offset
int compare_two_binary_files_alternate(FILE *fp1, FILE *fp2, uintmax_t *offset)
char tmp1[CMP_N], tmp2[CMP_N];
size_t n1, n2;
rewind(fp1); // start at beginning and clear error
rewind(fp2);
*offset = 0;
do
n1 = fread(tmp1, sizeof *tmp1, sizeof tmp1 / sizeof *tmp1, fp1);
if (n1 == 0 && ferror(fp1))
return -1;
n2 = fread(tmp2, sizeof *tmp2, sizeof tmp2 / sizeof *tmp2, fp2);
if (n2 == 0 && ferror(fp2))
return -2;
size_t n_min = n1 < n2 ? n1 : n2;
if (memcmp(tmp1, tmp2, n_min)) // Quickly find if file contents differ ...
for (size_t i = 0; i < n_min; i++) // Slowly find where they differ
if (tmp1[i] != tmp2[i])
*offset += i;
return 3;
*offset += n_min;
if (n1 > n_min)
return 1;
if (n2 > n_min)
return 2;
while (n1);
return 0;
add a comment |Â
up vote
0
down vote
In addition to what 200_success said:
"rb"
is not a valid fopen flag. It most likely has the same effect as"r"
the return value of
memcmp()
is zero if the two are equal, so i am not sure what the print loop is doing, especially because it exits if the first 16 bytes are the same.in general try not to print in calculation functions, instead use the return value to indicate a result.
16 is an arbitrary and very small block size to read. consider making it larger and storing it as a constant, rather than retyping it each time.
2
Disagree with ""rb"
is not a valid fopen flag." The C spec has "rb open binary file for reading".
â chux
1 hour ago
add a comment |Â
4 Answers
4
active
oldest
votes
4 Answers
4
active
oldest
votes
active
oldest
votes
active
oldest
votes
up vote
2
down vote
fread (tmp1, sizeof *tmp1, readsz, fp1);
fread (tmp2, sizeof *tmp2, readsz, fp2);
count += 16;
if(memcmp(tmp1, tmp2, readsz))
â¦
You are discarding the return values of the fread()
calls, blindly assuming that they both successfully read 16 bytes.
It is unclear what the return value of compare_two_binary_files(â¦, â¦)
means. In fact, you sometimes don't return a value at all. Your compiler should have warned you about that problem.
File I/O should be done a block at a time: 512 bytes, 1024 bytes, or 2048 bytes would be a more reasonable chunk size than 16.
Technically, most of the time is spent waiting for I/O. cpu_time_taken
is a misnomer: what you're measuring is called "wall clock time".
add a comment |Â
up vote
2
down vote
fread (tmp1, sizeof *tmp1, readsz, fp1);
fread (tmp2, sizeof *tmp2, readsz, fp2);
count += 16;
if(memcmp(tmp1, tmp2, readsz))
â¦
You are discarding the return values of the fread()
calls, blindly assuming that they both successfully read 16 bytes.
It is unclear what the return value of compare_two_binary_files(â¦, â¦)
means. In fact, you sometimes don't return a value at all. Your compiler should have warned you about that problem.
File I/O should be done a block at a time: 512 bytes, 1024 bytes, or 2048 bytes would be a more reasonable chunk size than 16.
Technically, most of the time is spent waiting for I/O. cpu_time_taken
is a misnomer: what you're measuring is called "wall clock time".
add a comment |Â
up vote
2
down vote
up vote
2
down vote
fread (tmp1, sizeof *tmp1, readsz, fp1);
fread (tmp2, sizeof *tmp2, readsz, fp2);
count += 16;
if(memcmp(tmp1, tmp2, readsz))
â¦
You are discarding the return values of the fread()
calls, blindly assuming that they both successfully read 16 bytes.
It is unclear what the return value of compare_two_binary_files(â¦, â¦)
means. In fact, you sometimes don't return a value at all. Your compiler should have warned you about that problem.
File I/O should be done a block at a time: 512 bytes, 1024 bytes, or 2048 bytes would be a more reasonable chunk size than 16.
Technically, most of the time is spent waiting for I/O. cpu_time_taken
is a misnomer: what you're measuring is called "wall clock time".
fread (tmp1, sizeof *tmp1, readsz, fp1);
fread (tmp2, sizeof *tmp2, readsz, fp2);
count += 16;
if(memcmp(tmp1, tmp2, readsz))
â¦
You are discarding the return values of the fread()
calls, blindly assuming that they both successfully read 16 bytes.
It is unclear what the return value of compare_two_binary_files(â¦, â¦)
means. In fact, you sometimes don't return a value at all. Your compiler should have warned you about that problem.
File I/O should be done a block at a time: 512 bytes, 1024 bytes, or 2048 bytes would be a more reasonable chunk size than 16.
Technically, most of the time is spent waiting for I/O. cpu_time_taken
is a misnomer: what you're measuring is called "wall clock time".
answered 1 hour ago
200_success
125k14144402
125k14144402
add a comment |Â
add a comment |Â
up vote
1
down vote
if ((fp1 != NULL) && (fp2 != NULL))
test is redundant. If one of them happened to beNULL
, the program is already terminated.Don't print error messages to the
stdout
. There isstderr
for that purpose.When printing to stdout, keep in mind that it is usually line buffered. The text stays in the internal buffer until a newline is printed. That's why it is important to print a newline after the message, not before.
printf("nError in opening file %s", argv[1]);
doesn't tell the most important part: why didfopen
fail. Printstrerror(errno)
as well. Combining the above bullets,fprintf(stderr, "Error in opening file %s: %sn", argv[1], strerror(errno));
You'd need to
#include <errno.h>
for that.
"why it is important to print a newline after the message" is not necessarily sufficient. What are the rules of automatic flushing stdout buffer in C?. Considerfflush(stdout);
.
â chux
1 hour ago
I like the idea of of informative error messages, yet printing a faulty file name is sometimes itself a problem. At a minimum, it is useful to include sententials to demarcate the iffy file name.file %s
-->file "%s"
.
â chux
32 secs ago
add a comment |Â
up vote
1
down vote
if ((fp1 != NULL) && (fp2 != NULL))
test is redundant. If one of them happened to beNULL
, the program is already terminated.Don't print error messages to the
stdout
. There isstderr
for that purpose.When printing to stdout, keep in mind that it is usually line buffered. The text stays in the internal buffer until a newline is printed. That's why it is important to print a newline after the message, not before.
printf("nError in opening file %s", argv[1]);
doesn't tell the most important part: why didfopen
fail. Printstrerror(errno)
as well. Combining the above bullets,fprintf(stderr, "Error in opening file %s: %sn", argv[1], strerror(errno));
You'd need to
#include <errno.h>
for that.
"why it is important to print a newline after the message" is not necessarily sufficient. What are the rules of automatic flushing stdout buffer in C?. Considerfflush(stdout);
.
â chux
1 hour ago
I like the idea of of informative error messages, yet printing a faulty file name is sometimes itself a problem. At a minimum, it is useful to include sententials to demarcate the iffy file name.file %s
-->file "%s"
.
â chux
32 secs ago
add a comment |Â
up vote
1
down vote
up vote
1
down vote
if ((fp1 != NULL) && (fp2 != NULL))
test is redundant. If one of them happened to beNULL
, the program is already terminated.Don't print error messages to the
stdout
. There isstderr
for that purpose.When printing to stdout, keep in mind that it is usually line buffered. The text stays in the internal buffer until a newline is printed. That's why it is important to print a newline after the message, not before.
printf("nError in opening file %s", argv[1]);
doesn't tell the most important part: why didfopen
fail. Printstrerror(errno)
as well. Combining the above bullets,fprintf(stderr, "Error in opening file %s: %sn", argv[1], strerror(errno));
You'd need to
#include <errno.h>
for that.
if ((fp1 != NULL) && (fp2 != NULL))
test is redundant. If one of them happened to beNULL
, the program is already terminated.Don't print error messages to the
stdout
. There isstderr
for that purpose.When printing to stdout, keep in mind that it is usually line buffered. The text stays in the internal buffer until a newline is printed. That's why it is important to print a newline after the message, not before.
printf("nError in opening file %s", argv[1]);
doesn't tell the most important part: why didfopen
fail. Printstrerror(errno)
as well. Combining the above bullets,fprintf(stderr, "Error in opening file %s: %sn", argv[1], strerror(errno));
You'd need to
#include <errno.h>
for that.
answered 1 hour ago
vnp
37k12992
37k12992
"why it is important to print a newline after the message" is not necessarily sufficient. What are the rules of automatic flushing stdout buffer in C?. Considerfflush(stdout);
.
â chux
1 hour ago
I like the idea of of informative error messages, yet printing a faulty file name is sometimes itself a problem. At a minimum, it is useful to include sententials to demarcate the iffy file name.file %s
-->file "%s"
.
â chux
32 secs ago
add a comment |Â
"why it is important to print a newline after the message" is not necessarily sufficient. What are the rules of automatic flushing stdout buffer in C?. Considerfflush(stdout);
.
â chux
1 hour ago
I like the idea of of informative error messages, yet printing a faulty file name is sometimes itself a problem. At a minimum, it is useful to include sententials to demarcate the iffy file name.file %s
-->file "%s"
.
â chux
32 secs ago
"why it is important to print a newline after the message" is not necessarily sufficient. What are the rules of automatic flushing stdout buffer in C?. Consider
fflush(stdout);
.â chux
1 hour ago
"why it is important to print a newline after the message" is not necessarily sufficient. What are the rules of automatic flushing stdout buffer in C?. Consider
fflush(stdout);
.â chux
1 hour ago
I like the idea of of informative error messages, yet printing a faulty file name is sometimes itself a problem. At a minimum, it is useful to include sententials to demarcate the iffy file name.
file %s
--> file "%s"
.â chux
32 secs ago
I like the idea of of informative error messages, yet printing a faulty file name is sometimes itself a problem. At a minimum, it is useful to include sententials to demarcate the iffy file name.
file %s
--> file "%s"
.â chux
32 secs ago
add a comment |Â
up vote
1
down vote
I'm trying to read two sufficiently large binary files, comparing them and printing the offset at which they differ.
compare_two_binary_files()
is faulty for various reasons.
The offset in which they differ can readily exceed the
int
range. Recommend the widest integer available instead. File sizes are not limited by the integer types available, yetuintmax_t
is a very reasonable assumption of being sufficient for a file size.Checking
feof()
only ignores the possibility of a rare input error messing up the compare.compare_two_binary_files()
can return without a returning a value. A compiler warning should have occurred. Enable all warnings or use a better compiler.Ignoring the return value of
fread()
is wrong.if(memcmp(tmp1, tmp2, readsz)){
is questionable. Theif()
is true when the buffer differ.
Suggested alternative:
// Use something more generous than 16. Maybe even 4096 or 64k and allocate buffers
#define CMP_N 256
// Return value:
// 0: files compare equal in content and length, fp1 size saved as offset
// 1: files differ, fp1 longer, fp2 size saved as offset
// 2: files differ, fp2 longer, fp1 size saved as offset
// 3: files differ at offset
// -1: fp1 trouble reading. Unspecified data in offset
// -2: fp2 trouble reading. Unspecified data in offset
int compare_two_binary_files_alternate(FILE *fp1, FILE *fp2, uintmax_t *offset)
char tmp1[CMP_N], tmp2[CMP_N];
size_t n1, n2;
rewind(fp1); // start at beginning and clear error
rewind(fp2);
*offset = 0;
do
n1 = fread(tmp1, sizeof *tmp1, sizeof tmp1 / sizeof *tmp1, fp1);
if (n1 == 0 && ferror(fp1))
return -1;
n2 = fread(tmp2, sizeof *tmp2, sizeof tmp2 / sizeof *tmp2, fp2);
if (n2 == 0 && ferror(fp2))
return -2;
size_t n_min = n1 < n2 ? n1 : n2;
if (memcmp(tmp1, tmp2, n_min)) // Quickly find if file contents differ ...
for (size_t i = 0; i < n_min; i++) // Slowly find where they differ
if (tmp1[i] != tmp2[i])
*offset += i;
return 3;
*offset += n_min;
if (n1 > n_min)
return 1;
if (n2 > n_min)
return 2;
while (n1);
return 0;
add a comment |Â
up vote
1
down vote
I'm trying to read two sufficiently large binary files, comparing them and printing the offset at which they differ.
compare_two_binary_files()
is faulty for various reasons.
The offset in which they differ can readily exceed the
int
range. Recommend the widest integer available instead. File sizes are not limited by the integer types available, yetuintmax_t
is a very reasonable assumption of being sufficient for a file size.Checking
feof()
only ignores the possibility of a rare input error messing up the compare.compare_two_binary_files()
can return without a returning a value. A compiler warning should have occurred. Enable all warnings or use a better compiler.Ignoring the return value of
fread()
is wrong.if(memcmp(tmp1, tmp2, readsz)){
is questionable. Theif()
is true when the buffer differ.
Suggested alternative:
// Use something more generous than 16. Maybe even 4096 or 64k and allocate buffers
#define CMP_N 256
// Return value:
// 0: files compare equal in content and length, fp1 size saved as offset
// 1: files differ, fp1 longer, fp2 size saved as offset
// 2: files differ, fp2 longer, fp1 size saved as offset
// 3: files differ at offset
// -1: fp1 trouble reading. Unspecified data in offset
// -2: fp2 trouble reading. Unspecified data in offset
int compare_two_binary_files_alternate(FILE *fp1, FILE *fp2, uintmax_t *offset)
char tmp1[CMP_N], tmp2[CMP_N];
size_t n1, n2;
rewind(fp1); // start at beginning and clear error
rewind(fp2);
*offset = 0;
do
n1 = fread(tmp1, sizeof *tmp1, sizeof tmp1 / sizeof *tmp1, fp1);
if (n1 == 0 && ferror(fp1))
return -1;
n2 = fread(tmp2, sizeof *tmp2, sizeof tmp2 / sizeof *tmp2, fp2);
if (n2 == 0 && ferror(fp2))
return -2;
size_t n_min = n1 < n2 ? n1 : n2;
if (memcmp(tmp1, tmp2, n_min)) // Quickly find if file contents differ ...
for (size_t i = 0; i < n_min; i++) // Slowly find where they differ
if (tmp1[i] != tmp2[i])
*offset += i;
return 3;
*offset += n_min;
if (n1 > n_min)
return 1;
if (n2 > n_min)
return 2;
while (n1);
return 0;
add a comment |Â
up vote
1
down vote
up vote
1
down vote
I'm trying to read two sufficiently large binary files, comparing them and printing the offset at which they differ.
compare_two_binary_files()
is faulty for various reasons.
The offset in which they differ can readily exceed the
int
range. Recommend the widest integer available instead. File sizes are not limited by the integer types available, yetuintmax_t
is a very reasonable assumption of being sufficient for a file size.Checking
feof()
only ignores the possibility of a rare input error messing up the compare.compare_two_binary_files()
can return without a returning a value. A compiler warning should have occurred. Enable all warnings or use a better compiler.Ignoring the return value of
fread()
is wrong.if(memcmp(tmp1, tmp2, readsz)){
is questionable. Theif()
is true when the buffer differ.
Suggested alternative:
// Use something more generous than 16. Maybe even 4096 or 64k and allocate buffers
#define CMP_N 256
// Return value:
// 0: files compare equal in content and length, fp1 size saved as offset
// 1: files differ, fp1 longer, fp2 size saved as offset
// 2: files differ, fp2 longer, fp1 size saved as offset
// 3: files differ at offset
// -1: fp1 trouble reading. Unspecified data in offset
// -2: fp2 trouble reading. Unspecified data in offset
int compare_two_binary_files_alternate(FILE *fp1, FILE *fp2, uintmax_t *offset)
char tmp1[CMP_N], tmp2[CMP_N];
size_t n1, n2;
rewind(fp1); // start at beginning and clear error
rewind(fp2);
*offset = 0;
do
n1 = fread(tmp1, sizeof *tmp1, sizeof tmp1 / sizeof *tmp1, fp1);
if (n1 == 0 && ferror(fp1))
return -1;
n2 = fread(tmp2, sizeof *tmp2, sizeof tmp2 / sizeof *tmp2, fp2);
if (n2 == 0 && ferror(fp2))
return -2;
size_t n_min = n1 < n2 ? n1 : n2;
if (memcmp(tmp1, tmp2, n_min)) // Quickly find if file contents differ ...
for (size_t i = 0; i < n_min; i++) // Slowly find where they differ
if (tmp1[i] != tmp2[i])
*offset += i;
return 3;
*offset += n_min;
if (n1 > n_min)
return 1;
if (n2 > n_min)
return 2;
while (n1);
return 0;
I'm trying to read two sufficiently large binary files, comparing them and printing the offset at which they differ.
compare_two_binary_files()
is faulty for various reasons.
The offset in which they differ can readily exceed the
int
range. Recommend the widest integer available instead. File sizes are not limited by the integer types available, yetuintmax_t
is a very reasonable assumption of being sufficient for a file size.Checking
feof()
only ignores the possibility of a rare input error messing up the compare.compare_two_binary_files()
can return without a returning a value. A compiler warning should have occurred. Enable all warnings or use a better compiler.Ignoring the return value of
fread()
is wrong.if(memcmp(tmp1, tmp2, readsz)){
is questionable. Theif()
is true when the buffer differ.
Suggested alternative:
// Use something more generous than 16. Maybe even 4096 or 64k and allocate buffers
#define CMP_N 256
// Return value:
// 0: files compare equal in content and length, fp1 size saved as offset
// 1: files differ, fp1 longer, fp2 size saved as offset
// 2: files differ, fp2 longer, fp1 size saved as offset
// 3: files differ at offset
// -1: fp1 trouble reading. Unspecified data in offset
// -2: fp2 trouble reading. Unspecified data in offset
int compare_two_binary_files_alternate(FILE *fp1, FILE *fp2, uintmax_t *offset)
char tmp1[CMP_N], tmp2[CMP_N];
size_t n1, n2;
rewind(fp1); // start at beginning and clear error
rewind(fp2);
*offset = 0;
do
n1 = fread(tmp1, sizeof *tmp1, sizeof tmp1 / sizeof *tmp1, fp1);
if (n1 == 0 && ferror(fp1))
return -1;
n2 = fread(tmp2, sizeof *tmp2, sizeof tmp2 / sizeof *tmp2, fp2);
if (n2 == 0 && ferror(fp2))
return -2;
size_t n_min = n1 < n2 ? n1 : n2;
if (memcmp(tmp1, tmp2, n_min)) // Quickly find if file contents differ ...
for (size_t i = 0; i < n_min; i++) // Slowly find where they differ
if (tmp1[i] != tmp2[i])
*offset += i;
return 3;
*offset += n_min;
if (n1 > n_min)
return 1;
if (n2 > n_min)
return 2;
while (n1);
return 0;
edited 12 mins ago
answered 27 mins ago
chux
11.7k11339
11.7k11339
add a comment |Â
add a comment |Â
up vote
0
down vote
In addition to what 200_success said:
"rb"
is not a valid fopen flag. It most likely has the same effect as"r"
the return value of
memcmp()
is zero if the two are equal, so i am not sure what the print loop is doing, especially because it exits if the first 16 bytes are the same.in general try not to print in calculation functions, instead use the return value to indicate a result.
16 is an arbitrary and very small block size to read. consider making it larger and storing it as a constant, rather than retyping it each time.
2
Disagree with ""rb"
is not a valid fopen flag." The C spec has "rb open binary file for reading".
â chux
1 hour ago
add a comment |Â
up vote
0
down vote
In addition to what 200_success said:
"rb"
is not a valid fopen flag. It most likely has the same effect as"r"
the return value of
memcmp()
is zero if the two are equal, so i am not sure what the print loop is doing, especially because it exits if the first 16 bytes are the same.in general try not to print in calculation functions, instead use the return value to indicate a result.
16 is an arbitrary and very small block size to read. consider making it larger and storing it as a constant, rather than retyping it each time.
2
Disagree with ""rb"
is not a valid fopen flag." The C spec has "rb open binary file for reading".
â chux
1 hour ago
add a comment |Â
up vote
0
down vote
up vote
0
down vote
In addition to what 200_success said:
"rb"
is not a valid fopen flag. It most likely has the same effect as"r"
the return value of
memcmp()
is zero if the two are equal, so i am not sure what the print loop is doing, especially because it exits if the first 16 bytes are the same.in general try not to print in calculation functions, instead use the return value to indicate a result.
16 is an arbitrary and very small block size to read. consider making it larger and storing it as a constant, rather than retyping it each time.
In addition to what 200_success said:
"rb"
is not a valid fopen flag. It most likely has the same effect as"r"
the return value of
memcmp()
is zero if the two are equal, so i am not sure what the print loop is doing, especially because it exits if the first 16 bytes are the same.in general try not to print in calculation functions, instead use the return value to indicate a result.
16 is an arbitrary and very small block size to read. consider making it larger and storing it as a constant, rather than retyping it each time.
answered 1 hour ago
Peter
65812
65812
2
Disagree with ""rb"
is not a valid fopen flag." The C spec has "rb open binary file for reading".
â chux
1 hour ago
add a comment |Â
2
Disagree with ""rb"
is not a valid fopen flag." The C spec has "rb open binary file for reading".
â chux
1 hour ago
2
2
Disagree with "
"rb"
is not a valid fopen flag." The C spec has "rb open binary file for reading".â chux
1 hour ago
Disagree with "
"rb"
is not a valid fopen flag." The C spec has "rb open binary file for reading".â chux
1 hour ago
add a comment |Â
ampat is a new contributor. Be nice, and check out our Code of Conduct.
ampat is a new contributor. Be nice, and check out our Code of Conduct.
ampat is a new contributor. Be nice, and check out our Code of Conduct.
ampat is a new contributor. Be nice, and check out our Code of Conduct.
Sign up or log in
StackExchange.ready(function ()
StackExchange.helpers.onClickDraftSave('#login-link');
);
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
StackExchange.ready(
function ()
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f204478%2fcomparing-two-large-binary-files-in-c%23new-answer', 'question_page');
);
Post as a guest
Sign up or log in
StackExchange.ready(function ()
StackExchange.helpers.onClickDraftSave('#login-link');
);
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Sign up or log in
StackExchange.ready(function ()
StackExchange.helpers.onClickDraftSave('#login-link');
);
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Sign up or log in
StackExchange.ready(function ()
StackExchange.helpers.onClickDraftSave('#login-link');
);
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Sign up using Google
Sign up using Facebook
Sign up using Email and Password