Comparing two large binary files in C

The name of the pictureThe name of the pictureThe name of the pictureClash Royale CLAN TAG#URR8PPP











up vote
4
down vote

favorite












I'm trying to read two sufficiently large binary files, comparing them and printing the offset at which they differ. I'm using fread to read the binary files and memcmp to compare them.



#include<stdio.h>
#include<time.h>

clock_t start, end;
int main(int argc, char *argv)

double cpu_time_taken;
FILE *fp1, *fp2;
printf("nArgument count: %d", argc);
printf("nFile 1 is: %s", argv[1]);
printf("nFile 2 is: %sn", argv[2]);
if (argc < 3)

printf("nInsufficient Arguments: n");
printf("nHelp:./executable <filename1> <filename2>n");
return 0;

else

fp1 = fopen(argv[1], "rb");
if (fp1 == NULL)

printf("nError in opening file %s", argv[1]);
return 0;


fp2 = fopen(argv[2], "rb");

if (fp2 == NULL)

printf("nError in opening file %s", argv[2]);
return 0;


if ((fp1 != NULL) && (fp2 != NULL))

start = clock();
compare_two_binary_files(fp1, fp2);
end = clock();
cpu_time_taken = ((double) (end - start)) / CLOCKS_PER_SEC;
printf("nTime taken to compare: %f", cpu_time_taken*1000);





int compare_two_binary_files(FILE *fp1, FILE *fp2)
!feof(fp2))
fread (tmp1, sizeof *tmp1, readsz, fp1);
fread (tmp2, sizeof *tmp2, readsz, fp2);
count += 16;
if(memcmp(tmp1, tmp2, readsz))
for(int i=0; i < readsz; i++)
printf ("%d: 0x%02x ",i, tmp1[i]);

printf("n%x", count);
return 0;












share|improve this question









New contributor




ampat is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.























    up vote
    4
    down vote

    favorite












    I'm trying to read two sufficiently large binary files, comparing them and printing the offset at which they differ. I'm using fread to read the binary files and memcmp to compare them.



    #include<stdio.h>
    #include<time.h>

    clock_t start, end;
    int main(int argc, char *argv)

    double cpu_time_taken;
    FILE *fp1, *fp2;
    printf("nArgument count: %d", argc);
    printf("nFile 1 is: %s", argv[1]);
    printf("nFile 2 is: %sn", argv[2]);
    if (argc < 3)

    printf("nInsufficient Arguments: n");
    printf("nHelp:./executable <filename1> <filename2>n");
    return 0;

    else

    fp1 = fopen(argv[1], "rb");
    if (fp1 == NULL)

    printf("nError in opening file %s", argv[1]);
    return 0;


    fp2 = fopen(argv[2], "rb");

    if (fp2 == NULL)

    printf("nError in opening file %s", argv[2]);
    return 0;


    if ((fp1 != NULL) && (fp2 != NULL))

    start = clock();
    compare_two_binary_files(fp1, fp2);
    end = clock();
    cpu_time_taken = ((double) (end - start)) / CLOCKS_PER_SEC;
    printf("nTime taken to compare: %f", cpu_time_taken*1000);





    int compare_two_binary_files(FILE *fp1, FILE *fp2)
    !feof(fp2))
    fread (tmp1, sizeof *tmp1, readsz, fp1);
    fread (tmp2, sizeof *tmp2, readsz, fp2);
    count += 16;
    if(memcmp(tmp1, tmp2, readsz))
    for(int i=0; i < readsz; i++)
    printf ("%d: 0x%02x ",i, tmp1[i]);

    printf("n%x", count);
    return 0;












    share|improve this question









    New contributor




    ampat is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
    Check out our Code of Conduct.





















      up vote
      4
      down vote

      favorite









      up vote
      4
      down vote

      favorite











      I'm trying to read two sufficiently large binary files, comparing them and printing the offset at which they differ. I'm using fread to read the binary files and memcmp to compare them.



      #include<stdio.h>
      #include<time.h>

      clock_t start, end;
      int main(int argc, char *argv)

      double cpu_time_taken;
      FILE *fp1, *fp2;
      printf("nArgument count: %d", argc);
      printf("nFile 1 is: %s", argv[1]);
      printf("nFile 2 is: %sn", argv[2]);
      if (argc < 3)

      printf("nInsufficient Arguments: n");
      printf("nHelp:./executable <filename1> <filename2>n");
      return 0;

      else

      fp1 = fopen(argv[1], "rb");
      if (fp1 == NULL)

      printf("nError in opening file %s", argv[1]);
      return 0;


      fp2 = fopen(argv[2], "rb");

      if (fp2 == NULL)

      printf("nError in opening file %s", argv[2]);
      return 0;


      if ((fp1 != NULL) && (fp2 != NULL))

      start = clock();
      compare_two_binary_files(fp1, fp2);
      end = clock();
      cpu_time_taken = ((double) (end - start)) / CLOCKS_PER_SEC;
      printf("nTime taken to compare: %f", cpu_time_taken*1000);





      int compare_two_binary_files(FILE *fp1, FILE *fp2)
      !feof(fp2))
      fread (tmp1, sizeof *tmp1, readsz, fp1);
      fread (tmp2, sizeof *tmp2, readsz, fp2);
      count += 16;
      if(memcmp(tmp1, tmp2, readsz))
      for(int i=0; i < readsz; i++)
      printf ("%d: 0x%02x ",i, tmp1[i]);

      printf("n%x", count);
      return 0;












      share|improve this question









      New contributor




      ampat is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
      Check out our Code of Conduct.











      I'm trying to read two sufficiently large binary files, comparing them and printing the offset at which they differ. I'm using fread to read the binary files and memcmp to compare them.



      #include<stdio.h>
      #include<time.h>

      clock_t start, end;
      int main(int argc, char *argv)

      double cpu_time_taken;
      FILE *fp1, *fp2;
      printf("nArgument count: %d", argc);
      printf("nFile 1 is: %s", argv[1]);
      printf("nFile 2 is: %sn", argv[2]);
      if (argc < 3)

      printf("nInsufficient Arguments: n");
      printf("nHelp:./executable <filename1> <filename2>n");
      return 0;

      else

      fp1 = fopen(argv[1], "rb");
      if (fp1 == NULL)

      printf("nError in opening file %s", argv[1]);
      return 0;


      fp2 = fopen(argv[2], "rb");

      if (fp2 == NULL)

      printf("nError in opening file %s", argv[2]);
      return 0;


      if ((fp1 != NULL) && (fp2 != NULL))

      start = clock();
      compare_two_binary_files(fp1, fp2);
      end = clock();
      cpu_time_taken = ((double) (end - start)) / CLOCKS_PER_SEC;
      printf("nTime taken to compare: %f", cpu_time_taken*1000);





      int compare_two_binary_files(FILE *fp1, FILE *fp2)
      !feof(fp2))
      fread (tmp1, sizeof *tmp1, readsz, fp1);
      fread (tmp2, sizeof *tmp2, readsz, fp2);
      count += 16;
      if(memcmp(tmp1, tmp2, readsz))
      for(int i=0; i < readsz; i++)
      printf ("%d: 0x%02x ",i, tmp1[i]);

      printf("n%x", count);
      return 0;









      performance c file






      share|improve this question









      New contributor




      ampat is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
      Check out our Code of Conduct.











      share|improve this question









      New contributor




      ampat is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
      Check out our Code of Conduct.









      share|improve this question




      share|improve this question








      edited 2 hours ago









      200_success

      125k14144402




      125k14144402






      New contributor




      ampat is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
      Check out our Code of Conduct.









      asked 2 hours ago









      ampat

      211




      211




      New contributor




      ampat is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
      Check out our Code of Conduct.





      New contributor





      ampat is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
      Check out our Code of Conduct.






      ampat is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
      Check out our Code of Conduct.




















          4 Answers
          4






          active

          oldest

          votes

















          up vote
          2
          down vote














          fread (tmp1, sizeof *tmp1, readsz, fp1);
          fread (tmp2, sizeof *tmp2, readsz, fp2);
          count += 16;
          if(memcmp(tmp1, tmp2, readsz))
          …




          You are discarding the return values of the fread() calls, blindly assuming that they both successfully read 16 bytes.



          It is unclear what the return value of compare_two_binary_files(…, …) means. In fact, you sometimes don't return a value at all. Your compiler should have warned you about that problem.



          File I/O should be done a block at a time: 512 bytes, 1024 bytes, or 2048 bytes would be a more reasonable chunk size than 16.



          Technically, most of the time is spent waiting for I/O. cpu_time_taken is a misnomer: what you're measuring is called "wall clock time".






          share|improve this answer



























            up vote
            1
            down vote













            • if ((fp1 != NULL) && (fp2 != NULL)) test is redundant. If one of them happened to be NULL, the program is already terminated.


            • Don't print error messages to the stdout. There is stderr for that purpose.


            • When printing to stdout, keep in mind that it is usually line buffered. The text stays in the internal buffer until a newline is printed. That's why it is important to print a newline after the message, not before.



            • printf("nError in opening file %s", argv[1]); doesn't tell the most important part: why did fopen fail. Print strerror(errno) as well. Combining the above bullets,



               fprintf(stderr, "Error in opening file %s: %sn", argv[1], strerror(errno));


              You'd need to #include <errno.h> for that.







            share|improve this answer




















            • "why it is important to print a newline after the message" is not necessarily sufficient. What are the rules of automatic flushing stdout buffer in C?. Consider fflush(stdout);.
              – chux
              1 hour ago











            • I like the idea of of informative error messages, yet printing a faulty file name is sometimes itself a problem. At a minimum, it is useful to include sententials to demarcate the iffy file name. file %s --> file "%s".
              – chux
              32 secs ago

















            up vote
            1
            down vote














            I'm trying to read two sufficiently large binary files, comparing them and printing the offset at which they differ.




            compare_two_binary_files() is faulty for various reasons.



            1. The offset in which they differ can readily exceed the int range. Recommend the widest integer available instead. File sizes are not limited by the integer types available, yet uintmax_t is a very reasonable assumption of being sufficient for a file size.


            2. Checking feof() only ignores the possibility of a rare input error messing up the compare.


            3. compare_two_binary_files() can return without a returning a value. A compiler warning should have occurred. Enable all warnings or use a better compiler.


            4. Ignoring the return value of fread() is wrong.


            5. if(memcmp(tmp1, tmp2, readsz)){ is questionable. The if() is true when the buffer differ.


            Suggested alternative:



            // Use something more generous than 16. Maybe even 4096 or 64k and allocate buffers
            #define CMP_N 256

            // Return value:
            // 0: files compare equal in content and length, fp1 size saved as offset
            // 1: files differ, fp1 longer, fp2 size saved as offset
            // 2: files differ, fp2 longer, fp1 size saved as offset
            // 3: files differ at offset
            // -1: fp1 trouble reading. Unspecified data in offset
            // -2: fp2 trouble reading. Unspecified data in offset
            int compare_two_binary_files_alternate(FILE *fp1, FILE *fp2, uintmax_t *offset)
            char tmp1[CMP_N], tmp2[CMP_N];
            size_t n1, n2;

            rewind(fp1); // start at beginning and clear error
            rewind(fp2);
            *offset = 0;
            do
            n1 = fread(tmp1, sizeof *tmp1, sizeof tmp1 / sizeof *tmp1, fp1);
            if (n1 == 0 && ferror(fp1))
            return -1;

            n2 = fread(tmp2, sizeof *tmp2, sizeof tmp2 / sizeof *tmp2, fp2);
            if (n2 == 0 && ferror(fp2))
            return -2;

            size_t n_min = n1 < n2 ? n1 : n2;
            if (memcmp(tmp1, tmp2, n_min)) // Quickly find if file contents differ ...
            for (size_t i = 0; i < n_min; i++) // Slowly find where they differ
            if (tmp1[i] != tmp2[i])
            *offset += i;
            return 3;



            *offset += n_min;
            if (n1 > n_min)
            return 1;

            if (n2 > n_min)
            return 2;

            while (n1);
            return 0;






            share|improve this answer





























              up vote
              0
              down vote













              In addition to what 200_success said:



              1. "rb" is not a valid fopen flag. It most likely has the same effect as "r"


              2. the return value of memcmp() is zero if the two are equal, so i am not sure what the print loop is doing, especially because it exits if the first 16 bytes are the same.


              3. in general try not to print in calculation functions, instead use the return value to indicate a result.


              4. 16 is an arbitrary and very small block size to read. consider making it larger and storing it as a constant, rather than retyping it each time.






              share|improve this answer
















              • 2




                Disagree with ""rb" is not a valid fopen flag." The C spec has "rb open binary file for reading".
                – chux
                1 hour ago










              Your Answer




              StackExchange.ifUsing("editor", function ()
              return StackExchange.using("mathjaxEditing", function ()
              StackExchange.MarkdownEditor.creationCallbacks.add(function (editor, postfix)
              StackExchange.mathjaxEditing.prepareWmdForMathJax(editor, postfix, [["\$", "\$"]]);
              );
              );
              , "mathjax-editing");

              StackExchange.ifUsing("editor", function ()
              StackExchange.using("externalEditor", function ()
              StackExchange.using("snippets", function ()
              StackExchange.snippets.init();
              );
              );
              , "code-snippets");

              StackExchange.ready(function()
              var channelOptions =
              tags: "".split(" "),
              id: "196"
              ;
              initTagRenderer("".split(" "), "".split(" "), channelOptions);

              StackExchange.using("externalEditor", function()
              // Have to fire editor after snippets, if snippets enabled
              if (StackExchange.settings.snippets.snippetsEnabled)
              StackExchange.using("snippets", function()
              createEditor();
              );

              else
              createEditor();

              );

              function createEditor()
              StackExchange.prepareEditor(
              heartbeatType: 'answer',
              convertImagesToLinks: false,
              noModals: false,
              showLowRepImageUploadWarning: true,
              reputationToPostImages: null,
              bindNavPrevention: true,
              postfix: "",
              onDemand: true,
              discardSelector: ".discard-answer"
              ,immediatelyShowMarkdownHelp:true
              );



              );






              ampat is a new contributor. Be nice, and check out our Code of Conduct.









               

              draft saved


              draft discarded


















              StackExchange.ready(
              function ()
              StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f204478%2fcomparing-two-large-binary-files-in-c%23new-answer', 'question_page');

              );

              Post as a guest






























              4 Answers
              4






              active

              oldest

              votes








              4 Answers
              4






              active

              oldest

              votes









              active

              oldest

              votes






              active

              oldest

              votes








              up vote
              2
              down vote














              fread (tmp1, sizeof *tmp1, readsz, fp1);
              fread (tmp2, sizeof *tmp2, readsz, fp2);
              count += 16;
              if(memcmp(tmp1, tmp2, readsz))
              …




              You are discarding the return values of the fread() calls, blindly assuming that they both successfully read 16 bytes.



              It is unclear what the return value of compare_two_binary_files(…, …) means. In fact, you sometimes don't return a value at all. Your compiler should have warned you about that problem.



              File I/O should be done a block at a time: 512 bytes, 1024 bytes, or 2048 bytes would be a more reasonable chunk size than 16.



              Technically, most of the time is spent waiting for I/O. cpu_time_taken is a misnomer: what you're measuring is called "wall clock time".






              share|improve this answer
























                up vote
                2
                down vote














                fread (tmp1, sizeof *tmp1, readsz, fp1);
                fread (tmp2, sizeof *tmp2, readsz, fp2);
                count += 16;
                if(memcmp(tmp1, tmp2, readsz))
                …




                You are discarding the return values of the fread() calls, blindly assuming that they both successfully read 16 bytes.



                It is unclear what the return value of compare_two_binary_files(…, …) means. In fact, you sometimes don't return a value at all. Your compiler should have warned you about that problem.



                File I/O should be done a block at a time: 512 bytes, 1024 bytes, or 2048 bytes would be a more reasonable chunk size than 16.



                Technically, most of the time is spent waiting for I/O. cpu_time_taken is a misnomer: what you're measuring is called "wall clock time".






                share|improve this answer






















                  up vote
                  2
                  down vote










                  up vote
                  2
                  down vote










                  fread (tmp1, sizeof *tmp1, readsz, fp1);
                  fread (tmp2, sizeof *tmp2, readsz, fp2);
                  count += 16;
                  if(memcmp(tmp1, tmp2, readsz))
                  …




                  You are discarding the return values of the fread() calls, blindly assuming that they both successfully read 16 bytes.



                  It is unclear what the return value of compare_two_binary_files(…, …) means. In fact, you sometimes don't return a value at all. Your compiler should have warned you about that problem.



                  File I/O should be done a block at a time: 512 bytes, 1024 bytes, or 2048 bytes would be a more reasonable chunk size than 16.



                  Technically, most of the time is spent waiting for I/O. cpu_time_taken is a misnomer: what you're measuring is called "wall clock time".






                  share|improve this answer













                  fread (tmp1, sizeof *tmp1, readsz, fp1);
                  fread (tmp2, sizeof *tmp2, readsz, fp2);
                  count += 16;
                  if(memcmp(tmp1, tmp2, readsz))
                  …




                  You are discarding the return values of the fread() calls, blindly assuming that they both successfully read 16 bytes.



                  It is unclear what the return value of compare_two_binary_files(…, …) means. In fact, you sometimes don't return a value at all. Your compiler should have warned you about that problem.



                  File I/O should be done a block at a time: 512 bytes, 1024 bytes, or 2048 bytes would be a more reasonable chunk size than 16.



                  Technically, most of the time is spent waiting for I/O. cpu_time_taken is a misnomer: what you're measuring is called "wall clock time".







                  share|improve this answer












                  share|improve this answer



                  share|improve this answer










                  answered 1 hour ago









                  200_success

                  125k14144402




                  125k14144402






















                      up vote
                      1
                      down vote













                      • if ((fp1 != NULL) && (fp2 != NULL)) test is redundant. If one of them happened to be NULL, the program is already terminated.


                      • Don't print error messages to the stdout. There is stderr for that purpose.


                      • When printing to stdout, keep in mind that it is usually line buffered. The text stays in the internal buffer until a newline is printed. That's why it is important to print a newline after the message, not before.



                      • printf("nError in opening file %s", argv[1]); doesn't tell the most important part: why did fopen fail. Print strerror(errno) as well. Combining the above bullets,



                         fprintf(stderr, "Error in opening file %s: %sn", argv[1], strerror(errno));


                        You'd need to #include <errno.h> for that.







                      share|improve this answer




















                      • "why it is important to print a newline after the message" is not necessarily sufficient. What are the rules of automatic flushing stdout buffer in C?. Consider fflush(stdout);.
                        – chux
                        1 hour ago











                      • I like the idea of of informative error messages, yet printing a faulty file name is sometimes itself a problem. At a minimum, it is useful to include sententials to demarcate the iffy file name. file %s --> file "%s".
                        – chux
                        32 secs ago














                      up vote
                      1
                      down vote













                      • if ((fp1 != NULL) && (fp2 != NULL)) test is redundant. If one of them happened to be NULL, the program is already terminated.


                      • Don't print error messages to the stdout. There is stderr for that purpose.


                      • When printing to stdout, keep in mind that it is usually line buffered. The text stays in the internal buffer until a newline is printed. That's why it is important to print a newline after the message, not before.



                      • printf("nError in opening file %s", argv[1]); doesn't tell the most important part: why did fopen fail. Print strerror(errno) as well. Combining the above bullets,



                         fprintf(stderr, "Error in opening file %s: %sn", argv[1], strerror(errno));


                        You'd need to #include <errno.h> for that.







                      share|improve this answer




















                      • "why it is important to print a newline after the message" is not necessarily sufficient. What are the rules of automatic flushing stdout buffer in C?. Consider fflush(stdout);.
                        – chux
                        1 hour ago











                      • I like the idea of of informative error messages, yet printing a faulty file name is sometimes itself a problem. At a minimum, it is useful to include sententials to demarcate the iffy file name. file %s --> file "%s".
                        – chux
                        32 secs ago












                      up vote
                      1
                      down vote










                      up vote
                      1
                      down vote









                      • if ((fp1 != NULL) && (fp2 != NULL)) test is redundant. If one of them happened to be NULL, the program is already terminated.


                      • Don't print error messages to the stdout. There is stderr for that purpose.


                      • When printing to stdout, keep in mind that it is usually line buffered. The text stays in the internal buffer until a newline is printed. That's why it is important to print a newline after the message, not before.



                      • printf("nError in opening file %s", argv[1]); doesn't tell the most important part: why did fopen fail. Print strerror(errno) as well. Combining the above bullets,



                         fprintf(stderr, "Error in opening file %s: %sn", argv[1], strerror(errno));


                        You'd need to #include <errno.h> for that.







                      share|improve this answer












                      • if ((fp1 != NULL) && (fp2 != NULL)) test is redundant. If one of them happened to be NULL, the program is already terminated.


                      • Don't print error messages to the stdout. There is stderr for that purpose.


                      • When printing to stdout, keep in mind that it is usually line buffered. The text stays in the internal buffer until a newline is printed. That's why it is important to print a newline after the message, not before.



                      • printf("nError in opening file %s", argv[1]); doesn't tell the most important part: why did fopen fail. Print strerror(errno) as well. Combining the above bullets,



                         fprintf(stderr, "Error in opening file %s: %sn", argv[1], strerror(errno));


                        You'd need to #include <errno.h> for that.








                      share|improve this answer












                      share|improve this answer



                      share|improve this answer










                      answered 1 hour ago









                      vnp

                      37k12992




                      37k12992











                      • "why it is important to print a newline after the message" is not necessarily sufficient. What are the rules of automatic flushing stdout buffer in C?. Consider fflush(stdout);.
                        – chux
                        1 hour ago











                      • I like the idea of of informative error messages, yet printing a faulty file name is sometimes itself a problem. At a minimum, it is useful to include sententials to demarcate the iffy file name. file %s --> file "%s".
                        – chux
                        32 secs ago
















                      • "why it is important to print a newline after the message" is not necessarily sufficient. What are the rules of automatic flushing stdout buffer in C?. Consider fflush(stdout);.
                        – chux
                        1 hour ago











                      • I like the idea of of informative error messages, yet printing a faulty file name is sometimes itself a problem. At a minimum, it is useful to include sententials to demarcate the iffy file name. file %s --> file "%s".
                        – chux
                        32 secs ago















                      "why it is important to print a newline after the message" is not necessarily sufficient. What are the rules of automatic flushing stdout buffer in C?. Consider fflush(stdout);.
                      – chux
                      1 hour ago





                      "why it is important to print a newline after the message" is not necessarily sufficient. What are the rules of automatic flushing stdout buffer in C?. Consider fflush(stdout);.
                      – chux
                      1 hour ago













                      I like the idea of of informative error messages, yet printing a faulty file name is sometimes itself a problem. At a minimum, it is useful to include sententials to demarcate the iffy file name. file %s --> file "%s".
                      – chux
                      32 secs ago




                      I like the idea of of informative error messages, yet printing a faulty file name is sometimes itself a problem. At a minimum, it is useful to include sententials to demarcate the iffy file name. file %s --> file "%s".
                      – chux
                      32 secs ago










                      up vote
                      1
                      down vote














                      I'm trying to read two sufficiently large binary files, comparing them and printing the offset at which they differ.




                      compare_two_binary_files() is faulty for various reasons.



                      1. The offset in which they differ can readily exceed the int range. Recommend the widest integer available instead. File sizes are not limited by the integer types available, yet uintmax_t is a very reasonable assumption of being sufficient for a file size.


                      2. Checking feof() only ignores the possibility of a rare input error messing up the compare.


                      3. compare_two_binary_files() can return without a returning a value. A compiler warning should have occurred. Enable all warnings or use a better compiler.


                      4. Ignoring the return value of fread() is wrong.


                      5. if(memcmp(tmp1, tmp2, readsz)){ is questionable. The if() is true when the buffer differ.


                      Suggested alternative:



                      // Use something more generous than 16. Maybe even 4096 or 64k and allocate buffers
                      #define CMP_N 256

                      // Return value:
                      // 0: files compare equal in content and length, fp1 size saved as offset
                      // 1: files differ, fp1 longer, fp2 size saved as offset
                      // 2: files differ, fp2 longer, fp1 size saved as offset
                      // 3: files differ at offset
                      // -1: fp1 trouble reading. Unspecified data in offset
                      // -2: fp2 trouble reading. Unspecified data in offset
                      int compare_two_binary_files_alternate(FILE *fp1, FILE *fp2, uintmax_t *offset)
                      char tmp1[CMP_N], tmp2[CMP_N];
                      size_t n1, n2;

                      rewind(fp1); // start at beginning and clear error
                      rewind(fp2);
                      *offset = 0;
                      do
                      n1 = fread(tmp1, sizeof *tmp1, sizeof tmp1 / sizeof *tmp1, fp1);
                      if (n1 == 0 && ferror(fp1))
                      return -1;

                      n2 = fread(tmp2, sizeof *tmp2, sizeof tmp2 / sizeof *tmp2, fp2);
                      if (n2 == 0 && ferror(fp2))
                      return -2;

                      size_t n_min = n1 < n2 ? n1 : n2;
                      if (memcmp(tmp1, tmp2, n_min)) // Quickly find if file contents differ ...
                      for (size_t i = 0; i < n_min; i++) // Slowly find where they differ
                      if (tmp1[i] != tmp2[i])
                      *offset += i;
                      return 3;



                      *offset += n_min;
                      if (n1 > n_min)
                      return 1;

                      if (n2 > n_min)
                      return 2;

                      while (n1);
                      return 0;






                      share|improve this answer


























                        up vote
                        1
                        down vote














                        I'm trying to read two sufficiently large binary files, comparing them and printing the offset at which they differ.




                        compare_two_binary_files() is faulty for various reasons.



                        1. The offset in which they differ can readily exceed the int range. Recommend the widest integer available instead. File sizes are not limited by the integer types available, yet uintmax_t is a very reasonable assumption of being sufficient for a file size.


                        2. Checking feof() only ignores the possibility of a rare input error messing up the compare.


                        3. compare_two_binary_files() can return without a returning a value. A compiler warning should have occurred. Enable all warnings or use a better compiler.


                        4. Ignoring the return value of fread() is wrong.


                        5. if(memcmp(tmp1, tmp2, readsz)){ is questionable. The if() is true when the buffer differ.


                        Suggested alternative:



                        // Use something more generous than 16. Maybe even 4096 or 64k and allocate buffers
                        #define CMP_N 256

                        // Return value:
                        // 0: files compare equal in content and length, fp1 size saved as offset
                        // 1: files differ, fp1 longer, fp2 size saved as offset
                        // 2: files differ, fp2 longer, fp1 size saved as offset
                        // 3: files differ at offset
                        // -1: fp1 trouble reading. Unspecified data in offset
                        // -2: fp2 trouble reading. Unspecified data in offset
                        int compare_two_binary_files_alternate(FILE *fp1, FILE *fp2, uintmax_t *offset)
                        char tmp1[CMP_N], tmp2[CMP_N];
                        size_t n1, n2;

                        rewind(fp1); // start at beginning and clear error
                        rewind(fp2);
                        *offset = 0;
                        do
                        n1 = fread(tmp1, sizeof *tmp1, sizeof tmp1 / sizeof *tmp1, fp1);
                        if (n1 == 0 && ferror(fp1))
                        return -1;

                        n2 = fread(tmp2, sizeof *tmp2, sizeof tmp2 / sizeof *tmp2, fp2);
                        if (n2 == 0 && ferror(fp2))
                        return -2;

                        size_t n_min = n1 < n2 ? n1 : n2;
                        if (memcmp(tmp1, tmp2, n_min)) // Quickly find if file contents differ ...
                        for (size_t i = 0; i < n_min; i++) // Slowly find where they differ
                        if (tmp1[i] != tmp2[i])
                        *offset += i;
                        return 3;



                        *offset += n_min;
                        if (n1 > n_min)
                        return 1;

                        if (n2 > n_min)
                        return 2;

                        while (n1);
                        return 0;






                        share|improve this answer
























                          up vote
                          1
                          down vote










                          up vote
                          1
                          down vote










                          I'm trying to read two sufficiently large binary files, comparing them and printing the offset at which they differ.




                          compare_two_binary_files() is faulty for various reasons.



                          1. The offset in which they differ can readily exceed the int range. Recommend the widest integer available instead. File sizes are not limited by the integer types available, yet uintmax_t is a very reasonable assumption of being sufficient for a file size.


                          2. Checking feof() only ignores the possibility of a rare input error messing up the compare.


                          3. compare_two_binary_files() can return without a returning a value. A compiler warning should have occurred. Enable all warnings or use a better compiler.


                          4. Ignoring the return value of fread() is wrong.


                          5. if(memcmp(tmp1, tmp2, readsz)){ is questionable. The if() is true when the buffer differ.


                          Suggested alternative:



                          // Use something more generous than 16. Maybe even 4096 or 64k and allocate buffers
                          #define CMP_N 256

                          // Return value:
                          // 0: files compare equal in content and length, fp1 size saved as offset
                          // 1: files differ, fp1 longer, fp2 size saved as offset
                          // 2: files differ, fp2 longer, fp1 size saved as offset
                          // 3: files differ at offset
                          // -1: fp1 trouble reading. Unspecified data in offset
                          // -2: fp2 trouble reading. Unspecified data in offset
                          int compare_two_binary_files_alternate(FILE *fp1, FILE *fp2, uintmax_t *offset)
                          char tmp1[CMP_N], tmp2[CMP_N];
                          size_t n1, n2;

                          rewind(fp1); // start at beginning and clear error
                          rewind(fp2);
                          *offset = 0;
                          do
                          n1 = fread(tmp1, sizeof *tmp1, sizeof tmp1 / sizeof *tmp1, fp1);
                          if (n1 == 0 && ferror(fp1))
                          return -1;

                          n2 = fread(tmp2, sizeof *tmp2, sizeof tmp2 / sizeof *tmp2, fp2);
                          if (n2 == 0 && ferror(fp2))
                          return -2;

                          size_t n_min = n1 < n2 ? n1 : n2;
                          if (memcmp(tmp1, tmp2, n_min)) // Quickly find if file contents differ ...
                          for (size_t i = 0; i < n_min; i++) // Slowly find where they differ
                          if (tmp1[i] != tmp2[i])
                          *offset += i;
                          return 3;



                          *offset += n_min;
                          if (n1 > n_min)
                          return 1;

                          if (n2 > n_min)
                          return 2;

                          while (n1);
                          return 0;






                          share|improve this answer















                          I'm trying to read two sufficiently large binary files, comparing them and printing the offset at which they differ.




                          compare_two_binary_files() is faulty for various reasons.



                          1. The offset in which they differ can readily exceed the int range. Recommend the widest integer available instead. File sizes are not limited by the integer types available, yet uintmax_t is a very reasonable assumption of being sufficient for a file size.


                          2. Checking feof() only ignores the possibility of a rare input error messing up the compare.


                          3. compare_two_binary_files() can return without a returning a value. A compiler warning should have occurred. Enable all warnings or use a better compiler.


                          4. Ignoring the return value of fread() is wrong.


                          5. if(memcmp(tmp1, tmp2, readsz)){ is questionable. The if() is true when the buffer differ.


                          Suggested alternative:



                          // Use something more generous than 16. Maybe even 4096 or 64k and allocate buffers
                          #define CMP_N 256

                          // Return value:
                          // 0: files compare equal in content and length, fp1 size saved as offset
                          // 1: files differ, fp1 longer, fp2 size saved as offset
                          // 2: files differ, fp2 longer, fp1 size saved as offset
                          // 3: files differ at offset
                          // -1: fp1 trouble reading. Unspecified data in offset
                          // -2: fp2 trouble reading. Unspecified data in offset
                          int compare_two_binary_files_alternate(FILE *fp1, FILE *fp2, uintmax_t *offset)
                          char tmp1[CMP_N], tmp2[CMP_N];
                          size_t n1, n2;

                          rewind(fp1); // start at beginning and clear error
                          rewind(fp2);
                          *offset = 0;
                          do
                          n1 = fread(tmp1, sizeof *tmp1, sizeof tmp1 / sizeof *tmp1, fp1);
                          if (n1 == 0 && ferror(fp1))
                          return -1;

                          n2 = fread(tmp2, sizeof *tmp2, sizeof tmp2 / sizeof *tmp2, fp2);
                          if (n2 == 0 && ferror(fp2))
                          return -2;

                          size_t n_min = n1 < n2 ? n1 : n2;
                          if (memcmp(tmp1, tmp2, n_min)) // Quickly find if file contents differ ...
                          for (size_t i = 0; i < n_min; i++) // Slowly find where they differ
                          if (tmp1[i] != tmp2[i])
                          *offset += i;
                          return 3;



                          *offset += n_min;
                          if (n1 > n_min)
                          return 1;

                          if (n2 > n_min)
                          return 2;

                          while (n1);
                          return 0;







                          share|improve this answer














                          share|improve this answer



                          share|improve this answer








                          edited 12 mins ago

























                          answered 27 mins ago









                          chux

                          11.7k11339




                          11.7k11339




















                              up vote
                              0
                              down vote













                              In addition to what 200_success said:



                              1. "rb" is not a valid fopen flag. It most likely has the same effect as "r"


                              2. the return value of memcmp() is zero if the two are equal, so i am not sure what the print loop is doing, especially because it exits if the first 16 bytes are the same.


                              3. in general try not to print in calculation functions, instead use the return value to indicate a result.


                              4. 16 is an arbitrary and very small block size to read. consider making it larger and storing it as a constant, rather than retyping it each time.






                              share|improve this answer
















                              • 2




                                Disagree with ""rb" is not a valid fopen flag." The C spec has "rb open binary file for reading".
                                – chux
                                1 hour ago














                              up vote
                              0
                              down vote













                              In addition to what 200_success said:



                              1. "rb" is not a valid fopen flag. It most likely has the same effect as "r"


                              2. the return value of memcmp() is zero if the two are equal, so i am not sure what the print loop is doing, especially because it exits if the first 16 bytes are the same.


                              3. in general try not to print in calculation functions, instead use the return value to indicate a result.


                              4. 16 is an arbitrary and very small block size to read. consider making it larger and storing it as a constant, rather than retyping it each time.






                              share|improve this answer
















                              • 2




                                Disagree with ""rb" is not a valid fopen flag." The C spec has "rb open binary file for reading".
                                – chux
                                1 hour ago












                              up vote
                              0
                              down vote










                              up vote
                              0
                              down vote









                              In addition to what 200_success said:



                              1. "rb" is not a valid fopen flag. It most likely has the same effect as "r"


                              2. the return value of memcmp() is zero if the two are equal, so i am not sure what the print loop is doing, especially because it exits if the first 16 bytes are the same.


                              3. in general try not to print in calculation functions, instead use the return value to indicate a result.


                              4. 16 is an arbitrary and very small block size to read. consider making it larger and storing it as a constant, rather than retyping it each time.






                              share|improve this answer












                              In addition to what 200_success said:



                              1. "rb" is not a valid fopen flag. It most likely has the same effect as "r"


                              2. the return value of memcmp() is zero if the two are equal, so i am not sure what the print loop is doing, especially because it exits if the first 16 bytes are the same.


                              3. in general try not to print in calculation functions, instead use the return value to indicate a result.


                              4. 16 is an arbitrary and very small block size to read. consider making it larger and storing it as a constant, rather than retyping it each time.







                              share|improve this answer












                              share|improve this answer



                              share|improve this answer










                              answered 1 hour ago









                              Peter

                              65812




                              65812







                              • 2




                                Disagree with ""rb" is not a valid fopen flag." The C spec has "rb open binary file for reading".
                                – chux
                                1 hour ago












                              • 2




                                Disagree with ""rb" is not a valid fopen flag." The C spec has "rb open binary file for reading".
                                – chux
                                1 hour ago







                              2




                              2




                              Disagree with ""rb" is not a valid fopen flag." The C spec has "rb open binary file for reading".
                              – chux
                              1 hour ago




                              Disagree with ""rb" is not a valid fopen flag." The C spec has "rb open binary file for reading".
                              – chux
                              1 hour ago










                              ampat is a new contributor. Be nice, and check out our Code of Conduct.









                               

                              draft saved


                              draft discarded


















                              ampat is a new contributor. Be nice, and check out our Code of Conduct.












                              ampat is a new contributor. Be nice, and check out our Code of Conduct.











                              ampat is a new contributor. Be nice, and check out our Code of Conduct.













                               


                              draft saved


                              draft discarded














                              StackExchange.ready(
                              function ()
                              StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f204478%2fcomparing-two-large-binary-files-in-c%23new-answer', 'question_page');

                              );

                              Post as a guest













































































                              Comments

                              Popular posts from this blog

                              What does second last employer means? [closed]

                              Installing NextGIS Connect into QGIS 3?

                              List of Gilmore Girls characters