From 14c90f733524a4f9142559f87584f377461fe4ef Mon Sep 17 00:00:00 2001 From: dwheeler Date: Tue, 16 Jan 2007 02:44:45 +0000 Subject: [PATCH] Initial import git-svn-id: svn+ssh://svn.code.sf.net/p/flawfinder/code/trunk@1 5c01084b-1f27-0410-9f85-80411afe95dc --- COPYING | 340 +++++++++ ChangeLog | 529 ++++++++++++++ INSTALL.txt | 65 ++ MANIFEST.in | 14 + README | 7 + announcement | 29 + correct-results.html | 270 +++++++ correct-results.txt | 139 ++++ flaw-defect-report | 114 +++ flawfinder | 1664 ++++++++++++++++++++++++++++++++++++++++++ flawfinder.1 | 737 +++++++++++++++++++ flawfinder.1.gz | Bin 0 -> 11039 bytes flawfinder.pdf | Bin 0 -> 59631 bytes flawfinder.ps | 1464 +++++++++++++++++++++++++++++++++++++ flawfinder.spec | 46 ++ flawtest.c | 26 + junk.c | 9 + makefile | 150 ++++ setup.cfg | 7 + setup.py | 41 ++ sloctest.c | 9 + test-results.html | 270 +++++++ test-results.txt | 139 ++++ test.c | 117 +++ test2.c | 1 + 25 files changed, 6187 insertions(+) create mode 100644 COPYING create mode 100644 ChangeLog create mode 100644 INSTALL.txt create mode 100644 MANIFEST.in create mode 100644 README create mode 100644 announcement create mode 100644 correct-results.html create mode 100644 correct-results.txt create mode 100644 flaw-defect-report create mode 100755 flawfinder create mode 100644 flawfinder.1 create mode 100644 flawfinder.1.gz create mode 100644 flawfinder.pdf create mode 100644 flawfinder.ps create mode 100644 flawfinder.spec create mode 100644 flawtest.c create mode 100644 junk.c create mode 100644 makefile create mode 100644 setup.cfg create mode 100644 setup.py create mode 100644 sloctest.c create mode 100644 test-results.html create mode 100644 test-results.txt create mode 100644 test.c create mode 100644 test2.c diff --git a/COPYING b/COPYING new file mode 100644 index 0000000..60549be --- /dev/null +++ b/COPYING @@ -0,0 +1,340 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc. + 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Library General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) 19yy + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) 19yy name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Library General +Public License instead of this License. diff --git a/ChangeLog b/ChangeLog new file mode 100644 index 0000000..fe6328e --- /dev/null +++ b/ChangeLog @@ -0,0 +1,529 @@ +2004-06-15 David A. Wheeler + * Released version 1.26. + * NOTE: Due to an error on my part, + the tar file for version 1.25 was for a short period + (after 2004-06-05) actually version 1.26, + incorrectly labelled as 1.25. + My sincere apologies!! Please upgrade to 1.26, since that + way you'll be SURE to get the right version. + +2004-06-04 David A. Wheeler + * Reviewed and modified Jared's code somewhat, and added + support for _TEXT() as well as _T(). + See http://www.rpi.edu/~pudeyo/articles/unicode.html for more info + on Microsoft's approach to internationalization involving TCHAR. + * Wrote ChangeLog entries for Jared's code. + +2004-06-04 Jared Robinson (jarrob, at, symantec.com) + * Added more support for Microsoft's approach to internationalization. + Thus, it accepts _T() just like gettext(), and adds many more + functions: _getts(), vswprintf(), _stprintf(), _vstprintf(), + vwprintf(), vfwprintf(), _vtprintf(), _ftprintf(), _vftprintf(), + _sntprintf(), _vsntprintf(), _ftscanf(), _gettc(). + In this approach, TCHAR and various macros are typically used. + In particular, _T() of tchar.h converts character strings + to long character strings, if _UNICODE is defined + (this makes TCHAR a long 16-bit character). Thus, T() is: + #ifdef _UNICODE + #define _T(x) L ## x + #else + #define _T(x) x + #endif + +2004-06-02 David A. Wheeler + * Added two new rules for GLib functions, + "g_get_home_dir" and "g_get_tmp_dir", per a suggestion by + Steve Kemp, lead of the Debian Security Auditing Project. + This closes the wishlist item in Debian bug report #250432 + (see http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=250432). + Contributors - please email wishlist items to me; + I can't monitor every distribution's local bug tracking system. + PLEASE tell upstream developers when there's a bug/wishlist + item, we can't fix it if we don't know. + * Added curl_getenv(). Kemp's suggestion reminded me to search + for other getenv()-like functions, and that one popped up. + * Added several rules for input functions (for -I) - + recv, recvfrom, recvmsg, fread, and readv. + * Tightened the false positive test slightly; if a name is + followed by = or - or + it's unlikely to be a function call, + so it'll be quietly discarded. + * Modified the summary report format slightly. + * Modified the getpass text to remove an extraneous character, + thanks to a bug report from Joerg Beyer (job, at, webde-ag.de) + * Modified installation instructions to clarify how to set + INSTALL_DIR at run-time so it installs elsewhere. + It uses the standard GNU conventions, but not everyone + knows about them. By default, it installs in /usr/local. + Just use normal make overrides to change that, e.g., + make INSTALL_DIR=/usr INSTALL_DIR_MAN=/usr/share/man install + I do NOT use the ?= macro-setting commands in the makefile, + because that's not standard (e.g., it's not in SUSv3), so + while that would work in GNU make, it wouldn't work in others. + +2004-05-31 David A. Wheeler + * Released version 1.25. + + +2004-05-30 David A. Wheeler + * Added more rules for finding problems by examining the + Red Hat Linux 9 documentation (the man3 man pages), + looking for phrases like "do not use", "security", and "obsolete". + Thus, added rules for + cuserid, getlogin, getpass, mkstemp, getpw, memalign, as + well as the obsolete functions gsignal, ssignal, ulimit, usleep. + * Modified text for strncat to clarify it. + My thanks to Christian Biere, christianbiere, at, gmx.de, for + reporting the problem. + * Added lengthy text to the manual to explain exactly how to use + flawfinder with vim and emacs. This should also help + integrate flawfinder into other text editors/IDEs. + * Fixed error in --columns format, so that the output is simply + "filename:linenumber:columnnumber" when --columns (-C) is used. + * Eliminated "Number of" phrase in the footer report + (it was redundant anyway) + * Added more statistical information to the footer report. + * Changed makefile to simplify running tests. + * Tests now autogenerate html and txt versions. + * Added shortcut single-letter commands (-D for --dataonly, + -Q for --quiet, -C for --columns), so that invoking from + editors is easier. + * Tries to autoremove some false positives. In particular, a function + name followed immediately by "=" (ignoring whitespace) + is automatically considered to be a variable and NOT a function, + and thus doesn't register as a hit. There are exotic cases + where this won't be correct, but they're pretty unlikely in + real code. + * Added a "--falsepositive" (-F) option, which tries to remove + many more likely false positives. The current heuristic is: + if -F is enabled, any function name must be + followed by "(" (ignoring whitespace) to be considered a + possible hit; otherwise, it's thrown away. + Thus, if you often use variable names that are + also the names of risky functions, like "access", you + might consider using this option. Note that flawfinder + uses simple lexical analysis; eliminating many more false positives + would require deeper code analysis + (to examine type information, buffer size declarations, etc.). + This option also disables reporting of static character + buffer arrays. + This -F option and the autoremoving of false positives above is + in response to a problem report from + Mike Ruscher (Mike.Ruscher, at, cse-cst.gc.ca), + though the approach and code is my own. This may not completely + solve Mr. Ruscher's problem, but it's a start. + * Documented that flawfinder output can be misunderstood if + there are source filenames whose names contain newline, linefeed, or + colon. Source filenames shouldn't have such characters anyway; + while flawfinder can handle them, many other tools can't. + * Modified the documentation to make it clear in the synopsis + which one-letter flags are short for which long names. + * Modified "make install" script slightly so that it creates + directories that don't yet exist when installing. + My thanks to Joerg Beyer (job, at webde-ag.de) for reporting + the problem and suggesting a solution. + This solution requires that "mkdir" support the "-p" option, + which shouldn't be a problem for nearly all users. + +2003-10-29 David A. Wheeler + * Released version 1.24. + * Fixed an incredibly obscure parsing error that caused some + false positives. If a constant C string, after the closing + double-quote, is followed by a \ and newline (instead of a comma), + the string might not be recognized as a constant string + (thus triggering warnings about non-constant values in some cases). + This kind of formatting is quite ugly and rare. + My thanks to Sascha Nitsch (sascha, at spsn.ath.cx) for pointing + this bug out and giving me a test case to work with. + * Added a warning for readlink. The implementation and warning + are mine, but the idea of warning about readlink came from + Stefan Kost (kost, at imn.htwk-leipzig.de). Thanks!! + +2003-09-27 David A. Wheeler + * Released version 1.23. Minor bugfixes. + +2003-09-27 David A. Wheeler + * Fixed subtle bug - in some circumstances single character constants + wouldn't be parsed correctly. My thanks to Scott Renfro + (scottdonotspam, at, renfro.org) for notifying me about this bug. + Scott Renfro also sent me a patch; I didn't use it + (the patch didn't handle other cases), but I'm grateful since it + illustrated the problem. + * Fixed documentation bug in man page. + The option "--minlevel=X" must be preceded by two dashes, + as are all GNU-style long options. The man page accidentally only + had one dash in the summary (it was correct elsewhere); it now + correctly shows both dashes. + * Modified man page to list filename extensions that are + interpreted as C/C++. + * Removed index.html from distribution - it's really only for the + website. + +2003-03-08 David A. Wheeler + * Released version 1.22. Output format slightly changed (colon added), + so that it's compatible with tools that expect compiler warnings + in the typical format "filename:line-number: warning". + To get the fully expected format (all in one line), use "-S". + Also, improved RPM packaging. + +2003-03-08 David A. Wheeler + * Changed makefile to be consistent with new RPM packaging approach. + * Changed makefile: now for testing, will automatically uninstall + old sloccount when creating rpm. Also (for me), make my_install + works (well, it helps me anyway). + +2003-02-01 Jose Pedro Oliveira + * Improved RPM packaging. + +2003-09-22 Jukka A. Ukkonen + * Recommended an extra colon in the output format, so that the + output format would like like typical compiler output (and thus + more compatible with existing tools that report warnings). + +2002-09-07 David A. Wheeler + * Released version 1.21, with the following changes: + * Improved the default output so it creates multiple formatted lines + instead of single very long lines for each hit. + Use the new "--singleline" (-S) option to get the original + "long line" format. + * Removed duplicate "getpass" entry in the ruleset; + this didn't hurt anything, but was unnecessary. + Thanks to the user who gave me that feedback, wish I'd kept your + email address so I could credit you properly :-). + * Added a short tutorial to man page. + * Fixed initial upper/lower case on many entries in the ruleset. + * Allow "--input" as a synonym for "--inputs". + +2002-07-07 David A. Wheeler + * Released version 1.20, with many more changes: + * Entries have been added to the database to detect file openings and + static character array definitions. + * The HTML format has been significantly improved. + * Joerg Beyer provided several nice improvements to flawfinder, + including a timing report. + * Now Flawfinder by default skips symbolic links, + and always skips special files, to counter attackers who + insert malicious files in their source code directories. + * The documentation has been improved in various ways. + +2002-07-05 David A. Wheeler + * Completely rewrote the functions handling opening the + files/directories named on the command line and when + walking down the directory tree. This was in part + to handle a new security requirement for source code web + hosting services, which may analyze code written by someone else + AND then send reports to someone else who doesn't have the + same rights to view files as the analysis program. + It's the last part that's different - the attacker may control + the code being analyzed and insert non-regular files or + symbolic links to "bad" files like /etc/passwd (to expose its + contents) or /dev/zero (to stall analysis). These are + annoying but not really a problem when the analyst is running on + his OWN machine. + To deal with this, now flawfinder NEVER opens a file type that isn't + a file or directory, and it skips symbolic + links by default (though this can be changed), + no matter if they're listed at the top or inside + a directory descendent. This is actually reasonable behavior + for everyone, since others may be analyzing programs + that they don't trust either. + * Added open() and fopen() as entries, now it has 127 entries + in the database. Modified test code to test it. + * Warning messages about skipping symlinks and + files that aren't regular files are now controlled by --quiet + instead of --dataonly; since --quiet controls printing + status messages this seems more reasonable. + * Changed the format of the HTML output - now it creates a list. + The ending is now in regular type, instead of
...
. + This seemed too look nicer. + * Reworked Beyer's patch that prints speed somewhat, e.g., to print + floating point time (on small programs or fast machines + the time would be reported as "0") and to avoid + divide-by-zero on systems where time really is reported + as zero. + * Added "--omittime", so that the regression test + results won't vary depending on the time the analysis takes. + * Fixed minor bug: now the filename "-" works to mean + standard input. This is rarely used, since usually files + are analyzed instead. + * Modified documentation to make clear that in many circumstances + you need to copy the source code to a separate area. + I removed the reference to "--nolink", since this is now + the default. + * Modified makefile to generate correct-results.html and + correct-results.txt, so that (1) there will be a standard + to compare with and (2) the web page has a demo. + +2002-07-05 Joerg Beyer + * Tiny patch to report the number of lines analyzed and + the analysis speed in lines/second. + +2002-07-04 David A. Wheeler + * Changed Joerg Beyer's patch that gives a nicer error + message if an invalid option flag is given. Now the patch + also works in Python 1.5. This involved using getopt.error + instead of getopt.GetoptError. + * Added a comment explicitly documenting that flawfinder + is written to run under both Python 1.5 and Python 2. + Lots of systems only include Python 1.5, or use Python 1.5 + as the default Python (e.g., Red Hat 7.2). + Someday that won't be a concern, but there's no reason it + can't easily port between the two for a while. + * Ran PyChecker and modified code to eliminate the errors it reports. + +2002-07-03 David A. Wheeler + * Changed the default to IGNORE symbolic links, and added the + --allowlink option to use symbolic links. This is a safer default, + and few people will really want to follow symbolic links anyway. + * Added option --dataonly to suppress headers and footers; + use this along with --quiet to get "just the facts" + (e.g., when processing the output with other tools). + This was inspired by a comment from A.T. Hofkamp. + +2002-07-03 Joerg Beyer + * Various small patches - thanks!! There were as follows: + * If you call flawfinder without input, + state that there was no input, not state that there's no hit. + * If interrupted with Control-C, flawfinder now prints cleanly + that it was interrupted. + * Print a nicer error message if an invalid option flag + is given. + * Just for completeness' sake, I'm including two of the patches: + --- flawfinder_orig Wed Jul 3 09:56:34 2002 + +++ flawfinder Wed Jul 3 10:25:49 2002 + @@ -1216,10 +1216,15 @@ + if loadhitlist: + f = open(loadhitlist) + hitlist = pickle.load(f) + else: + - for f in sys.argv[1:]: + + files = sys.argv[1:] + + if not files: + + print "*** no input files" + + return None + + for f in files: + process_dir_or_file(f) + + return 1 + + def show_final_results(): + global hitlist + count = 0 + @@ -1275,11 +1280,14 @@ + def flawfind(): + process_options() + display_header() + initialize_ruleset() + - process_files() + - show_final_results() + - save_if_desired() + + if process_files(): + + show_final_results() + + save_if_desired() + + + Detect control-C: + + --- flawfinder_orig Wed Jul 3 09:56:34 2002 + +++ flawfinder Wed Jul 3 09:58:37 2002 + @@ -1281,5 +1281,8 @@ + save_if_desired() + + if __name__ == '__main__': + - flawfind() + + try: + + flawfind() + + except KeyboardInterrupt: + + print "*** Flawfinder interrupted" + + --- flawfinder_orig Wed Jul 3 09:56:34 2002 + +++ flawfinder Wed Jul 3 09:58:37 2002 + @@ -1280,6 +1280,9 @@ + show_final_results() + save_if_desired() + + if __name__ == '__main__': + - flawfind() + + try: + + flawfind() + + except KeyboardInterrupt: + + print "*** Flawfinder interrupted" + + +2002-07-02 David A. Wheeler + * Added detection of static arrays of char, wchar_t, and TCHAR. + * Fixed typo in makefile uninstall script. My thanks to + Andrew Dalgleish for pointing this out. + * Modified installation to be friendlier to Cygwin. My thanks to + Andrew Dalgleish for pointing this out, too. + One step involved creating PYTHONEXT in the makefile + and documenting it, which was no problem. + A more surprising problem was that the INSTALL file needed to + be renamed to "INSTALL.txt", because otherwise "make install" + thinks that everything is already installed. + This is a nasty problem caused by Windows' type insensitivity + conflicting with normal Unix standards... this should really + be noted somewhere in various standard documents!! + I eventually added a ".PHONY:" target to the makefile, + which also solves the problem when using GNU make. + * Fixed ChangeLog - the 2002 dates were accidentally 2001. + +2002-07-02 David A. Wheeler + * Changed correct-results so that the version numbers are right. + * Created "make test-is-correct" which moves the test results + into the "correct-results" file. + +2002-07-02 David A. Wheeler + * Released version 1.01. + * Bugfix: Programs with getopt() or getopt_long() could trigger + a problem with flawfinder itself. Now fixed. + * Added the --nolink option, and a detailed description in the + man page. Basically, this foils attacks where malicious + programmers insert into their source tree symbolic links + to files like /etc/passwd or /dev/zero. + You still need to copy source code files into a separate area + if you are worried about malicious programmers; see the + new man page discussion about this. + +2002-07-01 David A. Wheeler + * Released version 1.00, a major step forward. + * I have significantly enlarged the database, from 55 rules + to 122 rules. Making the database this large is such a + major improvement in its usefulness that I've bumped the + version number up to 1.00. A number are from my book, + while others are suggested by "Writing Secure Code" by + Howard and LeBlanc (for the Windows-specific issues). + * Added HTML generation support. + * Significantly enlarged the test suite. + +2002-5-6 David A. Wheeler + * Released version 0.22, a very minor improvement. + * Modified the report about %s in scanf when a limit for %s + was provided; some found the error report very + confusing. My thanks to Agustin.Lopez, who asked a question + that led me to this understanding. + +2001-12-18 David A. Wheeler + * Released version 0.21. + * Fixed an error in the database entry for syslog(3), which + would cause incorrect hits. This resolves the Debian bug + "Bug#124009: flawfinder: wrong reports of format + fulnerabilities for syslog". + * Added simple "INSTALL" file. + * Fixed documentation, documenting --version and fixing a + format problem with "--neverignore". + * I accidentally wrote over version 0.20 with version 0.21's + contents. Sigh. + +2001-12-11 David A. Wheeler + * Released version 0.20. + * Added --version, which prints JUST the version number without + actually analyzing any programs. + +2001-11-08 David A. Wheeler + * Fixed MANIFEST.in to include "flawfinder.1*"; that way the + compressed man page is included when using MANIFEST.in. + Thanks to Jon Nelson for noting this. + The effect of this is quite tiny - + my tar file & rpm files already included the compressed + man page, so this error affects very few people. + Note also that this just meant that only the uncompressed + man page was in the MANIFEST, so I don't expect that this + error had any user-visible effects other than a few more K of man + page space (and with multi-Gigabyte drives, that's hard to notice). + +2001-11-04 David A. Wheeler + * Released version 0.19 + * Fixed a minor bug - flawfinder didn't realize that multiline strings + passed to gettext() are still constant strings. + My thanks to "Arthur", who reported this bug, and + Adam Lazur (Debian) who passed it on to me. + This closes Debian Bug#118025. + * Minor change - precomputed internationalization pattern for + a minor performance improvement. + * Output a reminder that not all hits are actually security + vulnerabilities, as well as that there may be other vulnerabilities. + The documentation mentioned this too, but including that in + the output of the program makes it clearer (apparantly some + expect flawfinder to perform amazing magic far beyond the + possible). I think ALL programs like this should include this + reminder; otherwise sane software developers somehow expect + programs like this to work miracles, instead of simply reporting + likely spots based on heuristics. + +2001-11-03 David A. Wheeler + * Added a "help" option and support for DistUtils, as well as + modification of the RPM spec file so it can be built by non-root. + My thanks to Jon Nelson for the patches to do this. + * Added "syslog" to the vulnerability database. + My thanks to Dave Aitel for this contribution. + * Generate and install compressed man page, rather than uncompressed. + My thanks to Marius Tomaschewski for this suggestion. + +2001-10-29 David A. Wheeler + * Released version 0.17. + * Created an RPM package, to simplify installation. + * Accepts RATS' "ignore" directive, as well as ITS4's, for + compatibility's sake with RATS. + * Trivial change: shortened processing status phrase to + "Processing" so long filenames are more likely to fit on one line. + * Modified the man page, in the hopes that the new one is even + easier to understand. + +2001-10-28 David A. Wheeler + * Released version 0.16. + * Added support for directories. If a directory (instead of a + file) is given on the command line as something to examine, + C/C++ files in that directory and its subdirectories (recursively) + are examined. This should make it easy to analyze entire projects, + and to make it easy to integrate flawfinder into project websites. + * Added to the vulnerability database: randomizing functions & getenv. + * Reports the number of hits at the end. + * Minor cleanup of text output. + * Print "processing" status every time a file is opened; this is + flushed, so that monitoring the status with "less" works well. + * Added the "--quiet" option, so that status information can be + suppressed. + +2001-06-06 David A. Wheeler + * Added support for file globbing on Windows/NT/DOS + (it's not needed for Cygwin - it's only needed when + run natively). File globbing characters are + correctly ignored in Unix-like ("posix") systems, since + the Unix shell does this for us. + +2001-05-29 David A. Wheeler + * Fixed manual page to close the "/*" comment with "*/". + +2001-05-29 David A. Wheeler + * Fixed a bug in directive handling, now directives work properly. + I only noticed this AFTER release of 0.14, sigh. + * Fixed the ChangeLog, to note the addition of --neverignore. + * Released version 0.15. + +2001-05-29 David A. Wheeler + * Fixed a minor problem in string handling; a string containing + \\ followed immediately by the double-quote mark (end of the string) + wasn't correctly handled. Now it is. + * Added information in the documentation describing how to ignore + hits on a specific line (a comment directive). Flawfinder has + always had this ability (since 0.12), but now it's documented. + Before, you had to read the test file test.c or the actual + flawfinder code to learn about this ability. + * Added the "--neverignore" / "-n" option. + * Having a number of conversations with John Viega comparing + RATS and flawfinder, with the goal of finding a way to + coordinate and have a "best of breed" scanner. This hasn't + produced a concrete result, but Viega will soon post a comparison + paper that I've had input on. + * Released version 0.14. + +2001-05-25 David A. Wheeler + * Fixed a minor error in that parameter parser; previously it + might have trouble with embedded preprocessor commands in + the middle of parameter lists. + * Added this ChangeLog. + * Released version 0.13. + +2001-05-21 David A. Wheeler + * Initial release of flawfinder version 0.12. + + diff --git a/INSTALL.txt b/INSTALL.txt new file mode 100644 index 0000000..991e7c7 --- /dev/null +++ b/INSTALL.txt @@ -0,0 +1,65 @@ +To install flawfinder: + +If you use an RPM-based system (e.g., Red Hat) or deb-based system +(e.g., Debian), use their respective RPM or debian installation program +and just install it; then ignore the rest of these instructions. +For a ports-based system where you have a current port, just use that. + +Otherwise, you'll need to install from the tarball. +So, here's how to do that. + +* Install the "tarball" and uncompress it. + GNU-based systems can run "tar xvzf flawfinder*.tar.gz" to do so, + then "cd" into the directory created. If that doesn't work + (e.g., you have an old tar program), use: + gunzip flawfinder*.tar.gz + tar xvf flawfinder*.tar + cd flawfinder-* + +* Decide where you want to put it. Flawfinder normally installs + in /usr/local, with the program in /usr/local/bin and the manual in + /usr/local/man, per GNU conventions. You can override this + using the normal GNU conventions when installing (with "make install") + by setting INSTALL_DIR (defaults to /usr/local), + INSTALL_DIR_BIN for the program location (defaults to INSTALL_DIR/bin), and + INSTALL_DIR_MAN for the manual location (defaults to INSTALL_DIR/man). + +* If you're using Cygwin on Windows, you can install it using "make install" + but you need to tell the makefile to use the .py extension + whenever you use make. This will be another make install override. + If you'll just install it, do this: + + make PYTHONEXT=.py install + + If you don't want to pass the "PYTHONEXT" extension each time, + you can change the file "makefile" to remember this. Just change + the line beginning with "PYTHONEXT=" so that it reads as follows: + PYTHONEXT=.py + +* Now install it, giving whatever overrides you need. + In most cases, you'll need to be root, so run this first: + su + + Then give the "make install" command appropriate for your system. + For an all-default installation, which is what you need for most cases: + make install + + (you need to be root; "make uninstall" reverses it). + + To install in /usr (the program in /usr/bin, the manual in /usr/man): + make INSTALL_DIR=/usr install + + To put the binaries in /usr/bin, and the manuals in /usr/share/man + (common for Red Hat Linux), do: + make INSTALL_DIR=/usr INSTALL_DIR_MAN=/usr/share/man install + + +* Windows systems should be able to run this on the command line (cmd.exe) + directly, but I haven't tried that. + +* You can also simply run the program in the directory you've unpacked it + into. It's a simple Python program, just type into a command line: + + ./flawfinder files_or_directory + + diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..c871528 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,14 @@ +include COPYING +include README +include announcement +include ChangeLog +include flawfinder.1* +include flawfinder.pdf +include flawfinder.ps + +include flawfinder +include makefile +include setup.cfg +include setup.py +include test.c +include test_result diff --git a/README b/README new file mode 100644 index 0000000..360c30a --- /dev/null +++ b/README @@ -0,0 +1,7 @@ +This is "flawfinder" by David A. Wheeler, . +It's a simple Python program for scanning source code for security problems. + +For more information, see: + http://www.dwheeler.com/flawfinder + +See INSTALL for installation instructions. diff --git a/announcement b/announcement new file mode 100644 index 0000000..2210ea8 --- /dev/null +++ b/announcement @@ -0,0 +1,29 @@ + +I've just released "flawfinder", a program that can scan source code +and identify out potential security flaws, ranking them by likely severity. +Unlike ITS4, flawfinder is completely open source / free software +(it's released under the GPL license). + +Flawfinder will miss some security problems, and point out issues that aren't +really security problems, but nevertheless I think it can help track +down security problems in code so that the code can be fixed. + +You can download flawfinder from: + http://www.dwheeler.com/flawfinder + +Flawfinder is in its very early stages - I'm labelling it version "0.12". +It works reliably, but its ruleset is currently small and rudimentary. +It can already find some security problems now, but expanding its ruleset +will give it much more power. Also, it currently can only examine C/C++ code. + +After I wrote flawfinder - and just before I released it - I found out that +Secure Software Solutions was also writing a program (RATS) to perform this +same task, also to be released under the GPL. We agreed to release our +programs simultaneously, and to mention each other's programs in our +announcements. Now that we've released our programs, we plan to coordinate +so that there will be a single open source / free software +source code scanner that will be a ``best of breed.'' + +--- David A. Wheeler + dwheeler@dwheeler.com + diff --git a/correct-results.html b/correct-results.html new file mode 100644 index 0000000..2535e58 --- /dev/null +++ b/correct-results.html @@ -0,0 +1,270 @@ + + + + +Flawfinder Results + + + + +

Flawfinder Results

+Here are the security scan results from +Flawfinder version 1.25, +(C) 2001-2004 David A. Wheeler. +Number of dangerous functions in C/C++ ruleset: 137 +

+Examining test.c
+Examining test2.c
+

    +
  • test.c:32: [5] (buffer) gets: + Does not check for buffer overflows. Use fgets() instead. +
    + gets(f);
    +
    +
  • test.c:56: [5] (buffer) strncat: + Easily used incorrectly (e.g., incorrectly computing the correct + maximum size to add). Consider strlcat or automatically resizing strings. + Risk is high; the length parameter appears to be a constant, instead of + computing the number of characters left. +
    +  strncat(d,s,sizeof(d)); /* Misuse - this should be flagged as riskier. */
    +
    +
  • test.c:57: [5] (buffer) _tcsncat: + Easily used incorrectly (e.g., incorrectly computing the correct + maximum size to add). Consider strlcat or automatically resizing strings. + Risk is high; the length parameter appears to be a constant, instead of + computing the number of characters left. +
    +  _tcsncat(d,s,sizeof(d)); /* Misuse - flag as riskier */
    +
    +
  • test.c:60: [5] (buffer) MultiByteToWideChar: + Requires maximum length in CHARACTERS, not bytes. Risk is high, it + appears that the size is given as bytes, but the function requires size as + characters. +
    +  MultiByteToWideChar(CP_ACP,0,szName,-1,wszUserName,sizeof(wszUserName));
    +
    +
  • test.c:62: [5] (buffer) MultiByteToWideChar: + Requires maximum length in CHARACTERS, not bytes. Risk is high, it + appears that the size is given as bytes, but the function requires size as + characters. +
    +  MultiByteToWideChar(CP_ACP,0,szName,-1,wszUserName,sizeof wszUserName);
    +
    +
  • test.c:73: [5] (misc) SetSecurityDescriptorDacl: + Never create NULL ACLs; an attacker can set it to Everyone (Deny All + Access), which would even forbid administrator access. +
    +  SetSecurityDescriptorDacl(&sd,TRUE,NULL,FALSE);
    +
    +
  • test.c:73: [5] (misc) SetSecurityDescriptorDacl: + Never create NULL ACLs; an attacker can set it to Everyone (Deny All + Access), which would even forbid administrator access. +
    +  SetSecurityDescriptorDacl(&sd,TRUE,NULL,FALSE);
    +
    +
  • test.c:17: [4] (buffer) strcpy: + Does not check for buffer overflows when copying to destination. + Consider using strncpy or strlcpy (warning, strncpy is easily misused). +
    + strcpy(b, a);
    +
    +
  • test.c:20: [4] (buffer) sprintf: + Does not check for buffer overflows. Use snprintf or vsnprintf. +
    + sprintf(s, "hello %s", bug);
    +
    +
  • test.c:21: [4] (buffer) sprintf: + Does not check for buffer overflows. Use snprintf or vsnprintf. +
    + sprintf(s, gettext("hello %s"), bug);
    +
    +
  • test.c:22: [4] (format) sprintf: + Potential format string problem. Make format string constant. +
    + sprintf(s, unknown, bug);
    +
    +
  • test.c:23: [4] (format) printf: + If format strings can be influenced by an attacker, they can be + exploited. Use a constant for the format specification. +
    + printf(bf, x);
    +
    +
  • test.c:25: [4] (buffer) scanf: + The scanf() family's %s operation, without a limit specification, + permits buffer overflows. Specify a limit to %s, or use a different input + function. +
    + scanf("%s", s);
    +
    +
  • test.c:27: [4] (buffer) scanf: + The scanf() family's %s operation, without a limit specification, + permits buffer overflows. Specify a limit to %s, or use a different input + function. +
    + scanf("%s", s);
    +
    +
  • test.c:38: [4] (format) syslog: + If syslog's format strings can be influenced by an attacker, they can + be exploited. Use a constant format string for syslog. +
    + syslog(LOG_ERR, attacker_string);
    +
    +
  • test.c:49: [4] (buffer) _mbscpy: + Does not check for buffer overflows when copying to destination. + Consider using a function version that stops copying at the end of the + buffer. +
    +  _mbscpy(d,s); /* like strcpy, this doesn't check for buffer overflow */
    +
    +
  • test.c:52: [4] (buffer) lstrcat: + Does not check for buffer overflows when concatenating to destination. +
    +  lstrcat(d,s);
    +
    +
  • test.c:75: [3] (shell) CreateProcess: + This causes a new process to execute and is difficult to use safely. + Specify the application path in the first argument, NOT as part of the + second, or embedded spaces could allow an attacker to force a different + program to run. +
    +  CreateProcess(NULL, "C:\\Program Files\\GoodGuy\\GoodGuy.exe -x", "");
    +
    +
  • test.c:75: [3] (shell) CreateProcess: + This causes a new process to execute and is difficult to use safely. + Specify the application path in the first argument, NOT as part of the + second, or embedded spaces could allow an attacker to force a different + program to run. +
    +  CreateProcess(NULL, "C:\\Program Files\\GoodGuy\\GoodGuy.exe -x", "");
    +
    +
  • test.c:91: [3] (buffer) getopt_long: + Some older implementations do not protect against internal buffer + overflows . Check implementation on installation, or limit the size of all + string inputs. +
    +    while ((optc = getopt_long (argc, argv, "a",longopts, NULL )) != EOF) {
    +
    +
  • test.c:16: [2] (buffer) strcpy: + Does not check for buffer overflows when copying to destination. + Consider using strncpy or strlcpy (warning, strncpy is easily misused). Risk + is low because the source is a constant string. +
    + strcpy(a, gettext("Hello there")); // Did this work?
    +
    +
  • test.c:19: [2] (buffer) sprintf: + Does not check for buffer overflows. Use snprintf or vsnprintf. Risk + is low because the source has a constant maximum length. +
    + sprintf(s, "hello");
    +
    +
  • test.c:45: [2] (buffer) char: + Statically-sized arrays can be overflowed. Perform bounds checking, + use functions that limit length, or ensure that the size is larger than + the maximum possible length. +
    +  char d[20];
    +
    +
  • test.c:46: [2] (buffer) char: + Statically-sized arrays can be overflowed. Perform bounds checking, + use functions that limit length, or ensure that the size is larger than + the maximum possible length. +
    +  char s[20];
    +
    +
  • test.c:50: [2] (buffer) memcpy: + Does not check for buffer overflows when copying to destination. Make + sure destination can always hold the source data. +
    +  memcpy(d,s);
    +
    +
  • test.c:51: [2] (buffer) CopyMemory: + Does not check for buffer overflows when copying to destination. Make + sure destination can always hold the source data. +
    +  CopyMemory(d,s);
    +
    +
  • test.c:97: [2] (misc) fopen: + Check when opening files - can an attacker redirect it (via symlinks), + force the opening of special file type (e.g., device files), move + things around to create a race condition, control its ancestors, or change + its contents?. +
    +  f = fopen("/etc/passwd", "r"); 
    +
    +
  • test.c:15: [1] (buffer) strcpy: + Does not check for buffer overflows when copying to destination. + Consider using strncpy or strlcpy (warning, strncpy is easily misused). Risk + is low because the source is a constant character. +
    + strcpy(a, "\n"); // Did this work?
    +
    +
  • test.c:18: [1] (buffer) sprintf: + Does not check for buffer overflows. Use snprintf or vsnprintf. Risk + is low because the source is a constant character. +
    + sprintf(s, "\n");
    +
    +
  • test.c:26: [1] (buffer) scanf: + it's unclear if the %s limit in the format string is small enough. + Check that the limit is sufficiently small, or use a different input + function. +
    + scanf("%10s", s);
    +
    +
  • test.c:53: [1] (buffer) strncpy: + Easily used incorrectly; doesn't always \0-terminate or check for + invalid pointers. +
    +  strncpy(d,s);
    +
    +
  • test.c:54: [1] (buffer) _tcsncpy: + Easily used incorrectly; doesn't always \0-terminate or check for + invalid pointers. +
    +  _tcsncpy(d,s);
    +
    +
  • test.c:55: [1] (buffer) strncat: + Easily used incorrectly (e.g., incorrectly computing the correct + maximum size to add). Consider strlcat or automatically resizing strings. +
    +  strncat(d,s,10);
    +
    +
  • test.c:58: [1] (buffer) strlen: + Does not handle strings that are not \0-terminated (it could cause a + crash if unprotected). +
    +  n = strlen(d);
    +
    +
  • test.c:64: [1] (buffer) MultiByteToWideChar: + Requires maximum length in CHARACTERS, not bytes. Risk is very low, + the length appears to be in characters not bytes. +
    +  MultiByteToWideChar(CP_ACP,0,szName,-1,wszUserName,sizeof(wszUserName)/sizeof(wszUserName[0]));
    +
    +
  • test.c:66: [1] (buffer) MultiByteToWideChar: + Requires maximum length in CHARACTERS, not bytes. Risk is very low, + the length appears to be in characters not bytes. +
    +  MultiByteToWideChar(CP_ACP,0,szName,-1,wszUserName,sizeof wszUserName /sizeof(wszUserName[0]));
    +
    +
+

+Hits = 36 +
+Lines analyzed = 118 +
+Physical Source Lines of Code (SLOC) = 80 +
+Hits @ level = [0] 0 [1] 9 [2] 7 [3] 3 [4] 10 [5] 7
+Hits @ level+ = [0+] 36 [1+] 36 [2+] 27 [3+] 20 [4+] 17 [5+] 7
+Hits/KSLOC @ level+ = [0+] 450 [1+] 450 [2+] 338 [3+] 250 [4+] 213 [5+] 88
+Suppressed hits = 2 (use --neverignore to show them) +
+Minimum risk level = 1 +
+Not every hit is necessarily a security vulnerability. +
+There may be other security vulnerabilities; review your code! + + diff --git a/correct-results.txt b/correct-results.txt new file mode 100644 index 0000000..d199548 --- /dev/null +++ b/correct-results.txt @@ -0,0 +1,139 @@ +Flawfinder version 1.25, (C) 2001-2004 David A. Wheeler. +Number of dangerous functions in C/C++ ruleset: 137 +Examining test.c +Examining test2.c +test.c:32: [5] (buffer) gets: + Does not check for buffer overflows. Use fgets() instead. +test.c:56: [5] (buffer) strncat: + Easily used incorrectly (e.g., incorrectly computing the correct + maximum size to add). Consider strlcat or automatically resizing strings. + Risk is high; the length parameter appears to be a constant, instead of + computing the number of characters left. +test.c:57: [5] (buffer) _tcsncat: + Easily used incorrectly (e.g., incorrectly computing the correct + maximum size to add). Consider strlcat or automatically resizing strings. + Risk is high; the length parameter appears to be a constant, instead of + computing the number of characters left. +test.c:60: [5] (buffer) MultiByteToWideChar: + Requires maximum length in CHARACTERS, not bytes. Risk is high, it + appears that the size is given as bytes, but the function requires size as + characters. +test.c:62: [5] (buffer) MultiByteToWideChar: + Requires maximum length in CHARACTERS, not bytes. Risk is high, it + appears that the size is given as bytes, but the function requires size as + characters. +test.c:73: [5] (misc) SetSecurityDescriptorDacl: + Never create NULL ACLs; an attacker can set it to Everyone (Deny All + Access), which would even forbid administrator access. +test.c:73: [5] (misc) SetSecurityDescriptorDacl: + Never create NULL ACLs; an attacker can set it to Everyone (Deny All + Access), which would even forbid administrator access. +test.c:17: [4] (buffer) strcpy: + Does not check for buffer overflows when copying to destination. + Consider using strncpy or strlcpy (warning, strncpy is easily misused). +test.c:20: [4] (buffer) sprintf: + Does not check for buffer overflows. Use snprintf or vsnprintf. +test.c:21: [4] (buffer) sprintf: + Does not check for buffer overflows. Use snprintf or vsnprintf. +test.c:22: [4] (format) sprintf: + Potential format string problem. Make format string constant. +test.c:23: [4] (format) printf: + If format strings can be influenced by an attacker, they can be + exploited. Use a constant for the format specification. +test.c:25: [4] (buffer) scanf: + The scanf() family's %s operation, without a limit specification, + permits buffer overflows. Specify a limit to %s, or use a different input + function. +test.c:27: [4] (buffer) scanf: + The scanf() family's %s operation, without a limit specification, + permits buffer overflows. Specify a limit to %s, or use a different input + function. +test.c:38: [4] (format) syslog: + If syslog's format strings can be influenced by an attacker, they can + be exploited. Use a constant format string for syslog. +test.c:49: [4] (buffer) _mbscpy: + Does not check for buffer overflows when copying to destination. + Consider using a function version that stops copying at the end of the + buffer. +test.c:52: [4] (buffer) lstrcat: + Does not check for buffer overflows when concatenating to destination. +test.c:75: [3] (shell) CreateProcess: + This causes a new process to execute and is difficult to use safely. + Specify the application path in the first argument, NOT as part of the + second, or embedded spaces could allow an attacker to force a different + program to run. +test.c:75: [3] (shell) CreateProcess: + This causes a new process to execute and is difficult to use safely. + Specify the application path in the first argument, NOT as part of the + second, or embedded spaces could allow an attacker to force a different + program to run. +test.c:91: [3] (buffer) getopt_long: + Some older implementations do not protect against internal buffer + overflows . Check implementation on installation, or limit the size of all + string inputs. +test.c:16: [2] (buffer) strcpy: + Does not check for buffer overflows when copying to destination. + Consider using strncpy or strlcpy (warning, strncpy is easily misused). Risk + is low because the source is a constant string. +test.c:19: [2] (buffer) sprintf: + Does not check for buffer overflows. Use snprintf or vsnprintf. Risk + is low because the source has a constant maximum length. +test.c:45: [2] (buffer) char: + Statically-sized arrays can be overflowed. Perform bounds checking, + use functions that limit length, or ensure that the size is larger than + the maximum possible length. +test.c:46: [2] (buffer) char: + Statically-sized arrays can be overflowed. Perform bounds checking, + use functions that limit length, or ensure that the size is larger than + the maximum possible length. +test.c:50: [2] (buffer) memcpy: + Does not check for buffer overflows when copying to destination. Make + sure destination can always hold the source data. +test.c:51: [2] (buffer) CopyMemory: + Does not check for buffer overflows when copying to destination. Make + sure destination can always hold the source data. +test.c:97: [2] (misc) fopen: + Check when opening files - can an attacker redirect it (via symlinks), + force the opening of special file type (e.g., device files), move + things around to create a race condition, control its ancestors, or change + its contents?. +test.c:15: [1] (buffer) strcpy: + Does not check for buffer overflows when copying to destination. + Consider using strncpy or strlcpy (warning, strncpy is easily misused). Risk + is low because the source is a constant character. +test.c:18: [1] (buffer) sprintf: + Does not check for buffer overflows. Use snprintf or vsnprintf. Risk + is low because the source is a constant character. +test.c:26: [1] (buffer) scanf: + it's unclear if the %s limit in the format string is small enough. + Check that the limit is sufficiently small, or use a different input + function. +test.c:53: [1] (buffer) strncpy: + Easily used incorrectly; doesn't always \0-terminate or check for + invalid pointers. +test.c:54: [1] (buffer) _tcsncpy: + Easily used incorrectly; doesn't always \0-terminate or check for + invalid pointers. +test.c:55: [1] (buffer) strncat: + Easily used incorrectly (e.g., incorrectly computing the correct + maximum size to add). Consider strlcat or automatically resizing strings. +test.c:58: [1] (buffer) strlen: + Does not handle strings that are not \0-terminated (it could cause a + crash if unprotected). +test.c:64: [1] (buffer) MultiByteToWideChar: + Requires maximum length in CHARACTERS, not bytes. Risk is very low, + the length appears to be in characters not bytes. +test.c:66: [1] (buffer) MultiByteToWideChar: + Requires maximum length in CHARACTERS, not bytes. Risk is very low, + the length appears to be in characters not bytes. + +Hits = 36 +Lines analyzed = 118 +Physical Source Lines of Code (SLOC) = 80 +Hits @ level = [0] 0 [1] 9 [2] 7 [3] 3 [4] 10 [5] 7 +Hits @ level+ = [0+] 36 [1+] 36 [2+] 27 [3+] 20 [4+] 17 [5+] 7 +Hits/KSLOC @ level+ = [0+] 450 [1+] 450 [2+] 338 [3+] 250 [4+] 213 [5+] 88 +Suppressed hits = 2 (use --neverignore to show them) +Minimum risk level = 1 +Not every hit is necessarily a security vulnerability. +There may be other security vulnerabilities; review your code! diff --git a/flaw-defect-report b/flaw-defect-report new file mode 100644 index 0000000..0152996 --- /dev/null +++ b/flaw-defect-report @@ -0,0 +1,114 @@ +From - Sun Nov 4 11:39:04 2001 +X-UIDL: 4bd5a7eeb0e24a21ff091e0d7f4cec01 +X-Mozilla-Status: 0001 +X-Mozilla-Status2: 00000000 +Return-Path: +Received: from cs.ida.org by fricka.csed.ida.org (SMI-8.6/SMI-SVR4) + id WAA06993; Fri, 2 Nov 2001 22:22:00 -0500 +Received: from mailhost.nl (webframe.nl [212.204.207.201]) + by cs.ida.org (Switch-2.2.0/Switch-2.2.0) with SMTP id fA33Lxp18254 + for ; Fri, 2 Nov 2001 22:21:59 -0500 (EST) +Received: from x-o.clustermonkey.org (postfix@x-o.clustermonkey.org [64.242.77.225]) + by mailhost.nl (8.9.3/8.9.3) with ESMTP id EAA12369 + for ; Sat, 3 Nov 2001 04:21:56 +0100 +Received: by x-o.clustermonkey.org (Postfix, from userid 1000) + id 6B96B61E92B; Fri, 2 Nov 2001 22:21:54 -0500 (EST) +Date: Fri, 2 Nov 2001 22:21:54 -0500 +From: Adam Lazur +To: "David A. Wheeler" +Subject: [arthur@tiefighter.et.tudelft.nl: Bug#118025: flawfinder does not detect multiline strings right] +Message-ID: <20011102222154.B24827@clustermonkey.org> +Mime-Version: 1.0 +Content-Type: multipart/mixed; boundary="rwEMma7ioTxnRzrJ" +Content-Disposition: inline +User-Agent: Mutt/1.3.23i +X-UIDL: 4bd5a7eeb0e24a21ff091e0d7f4cec01 + + +--rwEMma7ioTxnRzrJ +Content-Type: text/plain; charset=us-ascii +Content-Disposition: inline + +Attached is the first bug report from the Debian package of flawfinder. + +Replies can be sent to 118025@bugs.debian.org and they will be appended +to the bug's history and also sent to the bug submitter. The bug history +can be found at: http://bugs.debian.org/118025 + +-- +Adam Lazur, Cluster Monkey + +--rwEMma7ioTxnRzrJ +Content-Type: message/rfc822 +Content-Disposition: inline + +X-Envelope-From: debbugs@master.debian.org Fri Nov 2 10:09:37 2001 +Return-Path: +Delivered-To: laz@clustermonkey.org +Received: from master.debian.org (unknown [216.234.231.5]) + by x-o.clustermonkey.org (Postfix) with ESMTP id 981EF61E913 + for ; Fri, 2 Nov 2001 10:09:37 -0500 (EST) +Received: from debbugs by master.debian.org with local (Exim 3.12 1 (Debian)) + id 15zfqY-0007Zt-00; Fri, 02 Nov 2001 09:03:02 -0600 +Subject: Bug#118025: flawfinder does not detect multiline strings right +Reply-To: arthur@tiefighter.et.tudelft.nl, 118025@bugs.debian.org +Resent-From: arthur@tiefighter.et.tudelft.nl +Resent-To: debian-bugs-dist@lists.debian.org +Resent-Cc: Adam Lazur +Resent-Date: Fri, 02 Nov 2001 15:03:02 GMT +Resent-Message-ID: +X-Debian-PR-Message: report 118025 +X-Debian-PR-Package: flawfinder +X-Loop: owner@bugs.debian.org +Received: via spool by submit@bugs.debian.org id=B.100471287428113 + (code B ref -1); Fri, 02 Nov 2001 15:03:02 GMT +From: arthur@tiefighter.et.tudelft.nl +X-Authentication-Warning: ch.twi.tudelft.nl: arthur owned process doing -bs +Date: Fri, 2 Nov 2001 15:54:02 +0100 (CET) +X-Sender: arthur@ch.twi.tudelft.nl +To: submit@bugs.debian.org +Message-ID: +MIME-Version: 1.0 +Content-Type: TEXT/PLAIN; charset=US-ASCII +Delivered-To: submit@bugs.debian.org +Resent-Sender: Debian BTS + + +Package: flawfinder +Version: 0.17-1 +Severity: normal + + +Does strange things with respect to strings that are spread over multiple +lines. + +Sample code: + +1: static void a() +2: { +3: printf(_("a")); +4: printf(_("b" +5: "c")); +6: printf("a"); +7: printf("b" +8: "c"); +9: } + +Flawfinder output (partial): +/tmp/tst.c:4 [4] (format) printf: if format strings can be influenced by an attacker, they can be exploited. Use a constant for the format specification. + +One would expect flawfinder either to report lines 3 and 4 as possible +security riscs or lines 4 and 7. This is not expected behaviour. + +On a sindenote a disclaimer may be in order about the accuracy of the +results. All things flawfinder reported on my code were no security +threats. + +-- arthur - arthur@tiefighter.et.tudelft.nl - http://tiefighter.et.tudelft.nl/~arthur -- + + + +--rwEMma7ioTxnRzrJ-- + + + diff --git a/flawfinder b/flawfinder new file mode 100755 index 0000000..9db2e63 --- /dev/null +++ b/flawfinder @@ -0,0 +1,1664 @@ +#!/usr/bin/env python + +"""flawfinder: Find potential security flaws ("hits") in source code. + Usage: + flawfinder [options] [source_code_file]+ + + See the man page for a description of the options.""" + +version="1.26" + +# The default output is as follows: +# filename:line_number [risk_level] (type) function_name: message +# where "risk_level" goes from 0 to 5. 0=no risk, 5=maximum risk. +# The final output is sorted by risk level, most risky first. +# Optionally ":column_number" can be added after the line number. +# +# Currently this program can only analyze C/C++ code. +# Note: this code is designed to run under both Python 1.5 and 2. +# Thus, it avoids constructs not in Python 1.5 such as "+=" +# and "print >> stderr". + +# Copyright (C) 2001-2004 David A. Wheeler +# This is released under the General Public License (GPL): +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + + +import sys, re, string, getopt +import pickle # To support load/save/diff of hitlist +import os, glob, operator # To support filename expansion on Windows +import os.path +import time +# import formatter + +# Program Options - these are the default values: +show_context = 0 +minimum_level = 1 +show_immediately = 0 +show_inputs = 0 # Only show inputs? +falsepositive = 0 # Work to remove false positives? +allowlink = 0 # Allow symbolic links? +num_links_skipped = 0 # Number of links skipped. +show_columns = 0 +never_ignore = 0 # If true, NEVER ignore problems, even if directed. +list_rules = 0 # If true, list the rules (helpful for debugging) +loadhitlist = None +savehitlist = None +diffhitlist = None +quiet = 0 +showheading = 1 # --dataonly turns this off +output_format = 0 # 0 = normal, 1 = html. +single_line = 0 # 1 = singleline (can 't be 0 if html) +omit_time = 0 # 1 = omit time-to-run (needed for testing) + +displayed_header = 0 # Have we displayed the header yet? +num_ignored_hits = 0 # Number of ignored hits (used if never_ignore==0) + +def error(message): + sys.stderr.write("Error: %s\n"% message) + + +# Support routines: find a pattern. +# To simplify the calling convention, several global variables are used +# and these support routines are defined, in an attempt to make the +# actual calls simpler and clearer. +# + +filename = "" # Source filename. +linenumber = 0 # Linenumber from original file. +ignoreline = -1 # Line number to ignore. +sumlines = 0 # Number of lines (total) examined. +sloc = 0 # Physical SLOC +starttime = time.time() # Used to determine analyzed lines/second. + + +line_beginning = re.compile( r'(?m)^' ) +blank_line = re.compile( r'(?m)^\s+$' ) + +def htmlize(s): + # Take s, and return legal (UTF-8) HTML. + s1 = string.replace(s,"&","&") + s2 = string.replace(s1,"<","<") + s3 = string.replace(s2,">",">") + return s3 + +def h(s): + # htmlize s if we're generating html, otherwise just return s. + if output_format: return htmlize(s) + else: return s + +def print_multi_line(text): + # Print text as multiple indented lines. + width = 72 + prefix = " " + starting_position = len(prefix) + 1 + printed_something = 0 # Have we printed on this line? + position = starting_position + nextword = "" + + print prefix, + for c in text: + if (c == " "): + print nextword, + position = position + 1 # account for space we just printed. + printed_something = 1 + nextword = "" + else: # NonSpace. + nextword = nextword + c + position = position + 1 + if position > width: # Whups, out of space + if (printed_something): # We've printed something out. + print # Done with this line, move to next. + print prefix, + position = starting_position + print nextword, # Print remainder (can be overlong if no spaces) + + +class Hit: + """ + Each instance of Hit is a warning of some kind in a source code file. + See the rulesets, which define the conditions for triggering a hit. + Hit is initialized with a tuple containing the following: + hook: function to call when function name found. + level: (default) warning level, 0-5. 0=no problem, 5=very risky. + warning: warning (text saying what's the problem) + suggestion: suggestion (text suggesting what to do instead) + category: One of "buffer" (buffer overflow), "race" (race condition), + "tmpfile" (temporary file creation), "format" (format string). + Use "" if you don't have a better category. + url: URL fragment reference. + other: A dictionary with other settings. + + Other settings usually set: + + name: function name + parameter: the function parameters (0th parameter null) + input: set to 1 if the function inputs from external sources. + start: start position (index) of the function name (in text) + end: end position of the function name (in text) + filename: name of file + line: line number in file + column: column in line in file + context_text: text surrounding hit""" + + # Set default values: + source_position = 2 # By default, the second parameter is the source. + format_position = 1 # By default, the first parameter is the format. + input = 0 # By default, this doesn't read input. + note = "" # No additional notes. + filename = "" # Empty string is filename. + extract_lookahead = 0 # Normally don't extract lookahead. + + def __init__(self, data): + hook, level, warning, suggestion, category, url, other = data + self.hook, self.level = hook, level + self.warning, self.suggestion = warning, suggestion + self.category, self.url = category, url + # These will be set later, but I set them here so that + # analysis tools like PyChecker will know about them. + self.column = 0 + self.line = 0 + self.name = "" + self.context_text = "" + for key in other.keys(): + setattr(self, key, other[key]) + + def __cmp__(self, other): + return (cmp(other.level, self.level) or # Highest risk first. + cmp(self.filename, other.filename) or + cmp(self.line, other.line) or + cmp(self.column, other.column) or + cmp(self.name, other.name)) + + def __getitem__(self, X): # Define this so this works: "%(line)" % hit + return getattr(self, X) + + def show(self): + if output_format: print "

  • ", + sys.stdout.write(h(self.filename)) + + if show_columns: print ":%(line)s:%(column)s:" % self, + else: print ":%(line)s:" % self, + + if output_format: print "", + # Extra space before risk level in text, makes it easier to find: + print " [%(level)s]" % self, + if output_format: print "", + print "(%(category)s)" % self, + if output_format: print "", + print h("%(name)s:" % self), + if single_line: + print h("%(warning)s." % self), + if self.suggestion: print h(self.suggestion)+".", + print h(self.note), + else: + main_text = h("%(warning)s. " % self) + if self.suggestion: main_text = main_text + h(self.suggestion) + ". " + main_text = main_text + h(self.note) + print + print_multi_line(main_text) + if output_format: print "", + print + if show_context: + if output_format: print "
    "
    +      print h(self.context_text)
    +      if output_format: print "
    " + + + +# The "hitlist" is the list of all hits (warnings) found so far. +# Use add_warning to add to it. + +hitlist = [] + +def add_warning(hit): + global hitlist, num_ignored_hits + if show_inputs and not hit.input: return + if hit.level >= minimum_level: + if linenumber == ignoreline: + num_ignored_hits = num_ignored_hits + 1 + else: + hitlist.append(hit) + if show_immediately: + hit.show() + +def internal_warn(message): + print h(message) + +# C Language Specific + +def extract_c_parameters(text, pos=0): + "Return a list of the given C function's parameters, starting at text[pos]" + # '(a,b)' produces ['', 'a', 'b'] + i = pos + # Skip whitespace and find the "("; if there isn't one, return []: + while i < len(text): + if text[i] == '(': break + elif text[i] in string.whitespace: i = i + 1 + else: return [] + else: # Never found a reasonable ending. + return [] + i = i + 1 + parameters = [""] # Insert 0th entry, so 1st parameter is parameter[1]. + currentstart = i + parenlevel = 1 + instring = 0 # 1=in double-quote, 2=in single-quote + incomment = 0 + while i < len(text): + c = text[i] + if instring: + if c == '"' and instring == 1: instring = 0 + elif c == "'" and instring == 2: instring = 0 + # if \, skip next character too. The C/C++ rules for + # \ are actually more complex, supporting \ooo octal and + # \xhh hexadecimal (which can be shortened), but we don't need to + # parse that deeply, we just need to know we'll stay in string mode: + elif c == '\\': i = i + 1 + elif incomment: + if c == '*' and text[i:i+2]=='*/': + incomment = 0 + i = i + 1 + else: + if c == '"': instring = 1 + elif c == "'": instring = 2 + elif c == '/' and text[i:i+2]=='/*': + incomment = 1 + i = i + 1 + elif c == '/' and text[i:i+2]=='//': + while i < len(text) and text[i] != "\n": + i = i + 1 + elif c == '\\' and text[i:i+2]=='\\"': i = i + 1 # Handle exposed '\"' + elif c == '(': parenlevel = parenlevel + 1 + elif c == ',' and (parenlevel == 1): + parameters.append(string.strip( + p_trailingbackslashes.sub('', text[currentstart:i]))) + currentstart = i + 1 + elif c == ')': + parenlevel = parenlevel - 1 + if parenlevel <= 0: + parameters.append(string.strip( + p_trailingbackslashes.sub('', text[currentstart:i]))) + # Re-enable these for debugging: + # print " EXTRACT_C_PARAMETERS: ", text[pos:pos+80] + # print " RESULTS: ", parameters + return parameters + elif c == ';': + internal_warn("Parsing failed to find end of parameter list; " + "semicolon terminated it in %s" % text[pos:pos+200]) + return parameters + i = i + 1 + internal_warn("Parsing failed to find end of parameter list in %s" % + text[pos:pos+200]) + + +# These patterns match gettext() and _() for internationalization. +# This is compiled here, to avoid constant recomputation. +# FIXME: assumes simple function call if it ends with ")", +# so will get confused by patterns like gettext("hi") + function("bye") +# In practice, this doesn't seem to be a problem; gettext() is usually +# wrapped around the entire parameter. +# The ?s makes it posible to match multi-line strings. +gettext_pattern = re.compile(r'(?s)^\s*' + 'gettext' + r'\s*\((.*)\)\s*$') +undersc_pattern = re.compile(r'(?s)^\s*' + '_(T(EXT)?)?' + r'\s*\((.*)\)\s*$') + +def strip_i18n(text): + "Strip any internationalization function calls surrounding 'text', " + "such as gettext() and _()." + match = gettext_pattern.search(text) + if match: return string.strip(match.group(1)) + match = undersc_pattern.search(text) + if match: return string.strip(match.group(3)) + return text + +p_trailingbackslashes = re.compile( r'(\s|\\(\n|\r))*$') + +p_c_singleton_string = re.compile( r'^\s*"([^\\]|\\[^0-6]|\\[0-6]+)?"\s*$') + +def c_singleton_string(text): + "Returns true if text is a C string with 0 or 1 character." + if p_c_singleton_string.search(text): return 1 + else: return 0 + +# This string defines a C constant. +p_c_constant_string = re.compile( r'^\s*"([^\\]|\\[^0-6]|\\[0-6]+)*"$') + +def c_constant_string(text): + "Returns true if text is a constant C string." + if p_c_constant_string.search(text): return 1 + else: return 0 + + +# Precompile patterns for speed. + + +def c_buffer(hit): + source_position = hit.source_position + if source_position <= len(hit.parameters)-1: + source=hit.parameters[source_position] + if c_singleton_string(source): + hit.level = 1 + hit.note = "Risk is low because the source is a constant character." + elif c_constant_string(strip_i18n(source)): + hit.level = max( hit.level - 2, 1) + hit.note = "Risk is low because the source is a constant string." + add_warning(hit) + + +p_dangerous_strncat = re.compile(r'^\s*sizeof\s*(\(\s*)?[A-Za-z_$0-9]+' + + r'\s*(\)\s*)?(-\s*1\s*)?$') +# This is a heuristic: constants in C are usually given in all upper case letters. +# Yes, this need not be true, but it's true often enough that it's worth +# using as a heuristic. strncat better not be passed a constant as the length! +p_looks_like_constant = re.compile(r'^\s*[A-Z][A-Z_$0-9]+\s*(-\s*1\s*)?$') + +def c_strncat(hit): + if len(hit.parameters) > 3: + # A common mistake is to think that when calling strncat(dest,src,len), that + # "len" means the ENTIRE length of the destination. This isn't true, it must + # be the length of the characters TO BE ADDED at most. Which is one reason that + # strlcat is better than strncat. We'll detect a common case of this error; + # if the length parameter is of the form "sizeof(dest)", we have this error. + # Actually, sizeof(dest) is okay if the dest's first character is always \0, + # but in that case the programmer should use strncpy, NOT strncat. + # The following heuristic will certainly miss some dangerous cases, but + # it at least catches the most obvious situation. + # This particular heuristic is overzealous; it detects ANY sizeof, instead of + # only the sizeof(dest) (where dest is given in hit.parameters[1]). + # However, there aren't many other likely candidates for sizeof; some people + # use it to capture just the length of the source, but this is just as dangerous, + # since then it absolutely does NOT take care of the destination maximum length + # in general. It also detects if a constant is given as a length, if the + # constant follows common C naming rules. + length_text=hit.parameters[3] + if p_dangerous_strncat.search(length_text) or p_looks_like_constant.search(length_text): + hit.level = 5 + hit.note = ( "Risk is high; the length parameter appears to be a constant, " + + "instead of computing the number of characters left.") + add_warning(hit) + return + c_buffer(hit) + +def c_printf(hit): + format_position = hit.format_position + if format_position <= len(hit.parameters)-1: + # Assume that translators are trusted to not insert "evil" formats: + source = strip_i18n(hit.parameters[format_position]) + if c_constant_string(source): + # Parameter is constant, so there's no risk of format string problems. + if hit.name == "snprintf" or hit.name == "vsnprintf": + hit.level = 1 + hit.warning = \ + "On some very old systems, snprintf is incorrectly implemented " \ + "and permits buffer overflows; there are also incompatible " \ + "standard definitions of it" + hit.suggestion = "Check it during installation, or use something else" + hit.category = "port" + else: + # We'll pass it on, just in case it's needed, but at level 0 risk. + hit.level = 0 + hit.note = "Constant format string, so not considered very risky (there's some residual risk, especially in a loop)." + add_warning(hit) + + +p_dangerous_sprintf_format = re.compile(r'%-?([0-9]+|\*)?s') + +# sprintf has both buffer and format vulnerabilities. +def c_sprintf(hit): + source_position = hit.source_position + if source_position <= len(hit.parameters)-1: + source=hit.parameters[source_position] + if c_singleton_string(source): + hit.level = 1 + hit.note = "Risk is low because the source is a constant character." + else: + source = strip_i18n(source) + if c_constant_string(source): + if not p_dangerous_sprintf_format.search(source): + hit.level = max( hit.level - 2, 1) + hit.note = "Risk is low because the source has a constant maximum length." + # otherwise, warn of potential buffer overflow (the default) + else: + # Ho ho - a nonconstant format string - we have a different problem. + hit.warning = "Potential format string problem" + hit.suggestion = "Make format string constant" + hit.level = 4 + hit.category = "format" + hit.url = "" + add_warning(hit) + +p_dangerous_scanf_format = re.compile(r'%s') +p_low_risk_scanf_format = re.compile(r'%[0-9]+s') + +def c_scanf(hit): + format_position = hit.format_position + if format_position <= len(hit.parameters)-1: + # Assume that translators are trusted to not insert "evil" formats; + # it's not clear that translators will be messing with INPUT formats, + # but it's possible so we'll account for it. + source = strip_i18n(hit.parameters[format_position]) + if c_constant_string(source): + if p_dangerous_scanf_format.search(source): pass # Accept default. + elif p_low_risk_scanf_format.search(source): + # This is often okay, but sometimes extremely serious. + hit.level = 1 + hit.warning = "it's unclear if the %s limit in the format string is small enough" + hit.suggestion = "Check that the limit is sufficiently small, or use a different input function" + else: + # No risky scanf request. + # We'll pass it on, just in case it's needed, but at level 0 risk. + hit.level = 0 + hit.note = "No risky scanf format detected." + else: + # Format isn't a constant. + hit.note = "If the scanf format is influenceable by an attacker, it's exploitable." + add_warning(hit) + + +p_dangerous_multi_byte = re.compile(r'^\s*sizeof\s*(\(\s*)?[A-Za-z_$0-9]+' + + r'\s*(\)\s*)?(-\s*1\s*)?$') +p_safe_multi_byte = re.compile(r'^\s*sizeof\s*(\(\s*)?[A-Za-z_$0-9]+\s*(\)\s*)?' + + r'/\s*sizeof\s*\(\s*?[A-Za-z_$0-9]+\s*' + + r'\[\s*0\s*\]\)\s*(-\s*1\s*)?$') + +def c_multi_byte_to_wide_char(hit): + # Unfortunately, this doesn't detect bad calls when it's a #define or constant + # set by a sizeof(), but trying to do so would create FAR too many false positives. + if len(hit.parameters)-1 >= 6: + num_chars_to_copy=hit.parameters[6] + if p_dangerous_multi_byte.search(num_chars_to_copy): + hit.level = 5 + hit.note = ("Risk is high, it appears that the size is given as bytes, but the " + + "function requires size as characters.") + elif p_safe_multi_byte.search(num_chars_to_copy): + # This isn't really risk-free, since it might not be the destination, or the + # destination might be a character array (if it's a char pointer, the pattern + # is actually quite dangerous, but programmers are unlikely to make that error). + hit.level = 1 + hit.note = "Risk is very low, the length appears to be in characters not bytes." + add_warning(hit) + +p_null_text = re.compile(r'^ *(NULL|0|0x0) *$') + +def c_hit_if_null(hit): + null_position = hit.check_for_null + if null_position <= len(hit.parameters)-1: + null_text=hit.parameters[null_position] + if p_null_text.search(null_text): + add_warning(hit) + else: + return + add_warning(hit) # If insufficient # of parameters. + +p_static_array = re.compile(r'^[A-Za-z_]+\s+[A-Za-z0-9_$,\s\*()]+\[[^]]') + +def c_static_array(hit): + # This is cheating, but it does the job for most real code. + # In some cases it will match something that it shouldn't. + # We don't match ALL arrays, just those of certain types (e.g., char). + # In theory, any array can overflow, but in practice it seems that + # certain types are far more prone to problems, so we just report those. + if p_static_array.search(hit.lookahead): + add_warning(hit) # Found a static array, warn about it. + +def normal(hit): + add_warning(hit) + + +# "c_ruleset": the rules for identifying "hits" in C (potential warnings). +# It's a dictionary, where the key is the function name causing the hit, +# and the value is a tuple with the following format: +# (hook, level, warning, suggestion, category, {other}) +# See the definition for class "Hit". +# The key can have multiple values separated with "|". + +c_ruleset = { + "strcpy" : + (c_buffer, 4, + "Does not check for buffer overflows when copying to destination", + "Consider using strncpy or strlcpy (warning, strncpy is easily misused)", + "buffer", "", {}), + "lstrcpy|wcscpy|_tcscpy|_mbscpy" : + (c_buffer, 4, + "Does not check for buffer overflows when copying to destination", + "Consider using a function version that stops copying at the end of the buffer", + "buffer", "", {}), + "memcpy|CopyMemory|bcopy" : + (normal, 2, # I've found this to have a lower risk in practice. + "Does not check for buffer overflows when copying to destination", + "Make sure destination can always hold the source data", + "buffer", "", {}), + "strcat" : + (c_buffer, 4, + "Does not check for buffer overflows when concatenating to destination", + "Consider using strncat or strlcat (warning, strncat is easily misused)", + "buffer", "", {}), + "lstrcat|wcscat|_tcscat|_mbscat" : + (c_buffer, 4, + "Does not check for buffer overflows when concatenating to destination", + "", + "buffer", "", {}), + "strncpy" : + (c_buffer, + 1, # Low risk level, because this is often used correctly when FIXING security + # problems, and raising it to a higher risk level would cause many false positives. + "Easily used incorrectly; doesn't always \\0-terminate or " + + "check for invalid pointers", + "", + "buffer", "", {}), + "lstrcpyn|wcsncpy|_tcsncpy|_mbsnbcpy" : + (c_buffer, + 1, # Low risk level, because this is often used correctly when FIXING security + # problems, and raising it to a higher risk levle would cause many false positives. + "Easily used incorrectly; doesn't always \\0-terminate or " + + "check for invalid pointers", + "", + "buffer", "", {}), + "strncat" : + (c_strncat, + 1, # Low risk level, because this is often used correctly when + # FIXING security problems, and raising it to a + # higher risk level would cause many false positives. + "Easily used incorrectly (e.g., incorrectly computing the correct maximum size to add)", + "Consider strlcat or automatically resizing strings", + "buffer", "", {}), + "lstrcatn|wcsncat|_tcsncat|_mbsnbcat" : + (c_strncat, + 1, # Low risk level, because this is often used correctly when FIXING security + # problems, and raising it to a higher risk level would cause many false positives. + "Easily used incorrectly (e.g., incorrectly computing the correct maximum size to add)", + "Consider strlcat or automatically resizing strings", + "buffer", "", {}), + "strccpy|strcadd": + (normal, 1, + "Subject to buffer overflow if buffer is not as big as claimed", + "Ensure that destination buffer is sufficiently large", + "buffer", "", {}), + "char|TCHAR|wchar_t": # This isn't really a function call, but it works. + (c_static_array, 2, + "Statically-sized arrays can be overflowed", + ("Perform bounds checking, use functions that limit length, " + + "or ensure that the size is larger than the maximum possible length"), + "buffer", "", {'extract_lookahead' : 1}), + + "gets|_getts": + (normal, 5, "Does not check for buffer overflows", + "Use fgets() instead", "buffer", "", {'input' : 1}), + + # The "sprintf" hook will raise "format" issues instead if appropriate: + "sprintf|vsprintf|swprintf|vswprintf|_stprintf|_vstprintf": + (c_sprintf, 4, + "Does not check for buffer overflows", + "Use snprintf or vsnprintf", + "buffer", "", {}), + + # TODO: Add "wide character" versions of these functions. + "printf|vprintf|vwprintf|vfwprintf|_vtprintf": + (c_printf, 4, + "If format strings can be influenced by an attacker, they can be exploited", + "Use a constant for the format specification", + "format", "", {}), + + "fprintf|vfprintf|_ftprintf|_vftprintf": + (c_printf, 4, + "If format strings can be influenced by an attacker, they can be exploited", + "Use a constant for the format specification", + "format", "", { 'format_position' : 2}), + + # The "syslog" hook will raise "format" issues. + "syslog": + (c_printf, 4, + "If syslog's format strings can be influenced by an attacker, " + + "they can be exploited", + "Use a constant format string for syslog", + "format", "", { 'format_position' : 2} ), + + "snprintf|vsnprintf|_snprintf|_sntprintf|_vsntprintf": + (c_printf, 4, + "If format strings can be influenced by an attacker, they can be " + + "exploited, and note that sprintf variations do not always \\0-terminate", + "Use a constant for the format specification", + "format", "", { 'format_position' : 3}), + + "scanf|vscanf|wscanf|_tscanf": + (c_scanf, 4, + "The scanf() family's %s operation, without a limit specification, " + + "permits buffer overflows", + "Specify a limit to %s, or use a different input function", + "buffer", "", {'input' : 1}), + + "fscanf|sscanf|vsscanf|vfscanf|_ftscanf": + (c_scanf, 4, + "The scanf() family's %s operation, without a limit specification, " + "permits buffer overflows", + "Specify a limit to %s, or use a different input function", + "buffer", "", {'input' : 1, 'format_position' : 2}), + + "strlen|wcslen|_tcslen|_mbslen" : + (normal, + 1, # Often this isn't really a risk, and even when, it usually at worst causes + # program crash (and nothing worse). + "Does not handle strings that are not \\0-terminated (it could cause a crash " + + "if unprotected)", + "", + "buffer", "", {}), + + "MultiByteToWideChar" : # Windows + (c_multi_byte_to_wide_char, + 2, # Only the default - this will be changed in many cases. + "Requires maximum length in CHARACTERS, not bytes", + "", + "buffer", "", {}), + + "streadd|strecpy": + (normal, 4, + "This function does not protect against buffer overflows", + "Ensure the destination has 4 times the size of the source, to leave room for expansion", + "buffer", "dangers-c", {}), + + "strtrns": + (normal, 3, + "This function does not protect against buffer overflows", + "Ensure that destination is at least as long as the source", + "buffer", "dangers-c", {}), + + "realpath": + (normal, 3, + "This function does not protect against buffer overflows, " + + "and some implementations can overflow internally", + "Ensure that the destination buffer is at least of size MAXPATHLEN, and" + + "to protect against implementation problems, the input argument should also " + + "be checked to ensure it is no larger than MAXPATHLEN", + "buffer", "dangers-c", {}), + + "getopt|getopt_long": + (normal, 3, + "Some older implementations do not protect against internal buffer overflows ", + "Check implementation on installation, or limit the size of all string inputs", + "buffer", "dangers-c", {'input' : 1}), + + "getpass": + (normal, 3, + "Some implementations may overflow buffers", + "", + "buffer", "dangers-c", {'input' : 1}), + + "getwd": + (normal, 3, + "This does not protect against buffer overflows " + "by itself, so use with caution", + "Use getcwd instead", + "buffer", "dangers-c", {'input' : 1}), + + # fread not included here; in practice I think it's rare to mistake it. + "getchar|fgetc|getc|read|_gettc": + (normal, 1, + "Check buffer boundaries if used in a loop", # loops may be via recursion, too. + "", + "buffer", "dangers-c", {'input' : 1}), + + "access": # ???: TODO: analyze TOCTOU more carefully. + (normal, 4, + "This usually indicates a security flaw. If an " + + "attacker can change anything along the path between the " + + "call to access() and the file's actual use (e.g., by moving " + + "files), the attacker can exploit the race condition", + "Set up the correct permissions (e.g., using setuid()) and " + + "try to open the file directly", + "race", + "avoid-race#atomic-filesystem", {}), + "chown": + (normal, 5, + "This accepts filename arguments; if an attacker " + + "can move those files, a race condition results. ", + "Use fchown( ) instead", + "race", "", {}), + "chgrp": + (normal, 5, + "This accepts filename arguments; if an attacker " + + "can move those files, a race condition results. ", + "Use fchgrp( ) instead", + "race", "", {}), + "chmod": + (normal, 5, + "This accepts filename arguments; if an attacker " + + "can move those files, a race condition results. ", + "Use fchmod( ) instead", + "race", "", {}), + "vfork": + (normal, 2, + "On some old systems, vfork() permits race conditions, and it's " + + "very difficult to use correctly", + "Use fork() instead", + "race", "", {}), + "readlink": + (normal, 5, + "This accepts filename arguments; if an attacker " + + "can move those files or change the link content, " + + "a race condition results. Also, it does not terminate with ASCII NUL", + # This is often just a bad idea, and it's hard to suggest a + # simple alternative: + "Reconsider approach", + "race", "", {'input' : 1}), + + "tmpfile": + (normal, 2, + "Function tmpfile() has a security flaw on some systems (e.g., older System V systems)", + "", + "tmpfile", "", {}), + "tmpnam|tempnam": + (normal, 3, + "Temporary file race condition", + "", + "tmpfile", "avoid-race", {}), + + # TODO: Detect GNOME approach to mktemp and ignore it. + "mktemp": + (normal, 4, + "Temporary file race condition", + "", + "tmpfile", "avoid-race", {}), + + "mkstemp": + (normal, 2, + "Potential for temporary file vulnerability in some circumstances. Some older Unix-like systems create temp files with permission to write by all by default, so be sure to set the umask to override this. Also, some older Unix systems might fail to use O_EXCL when opening the file, so make sure that O_EXCL is used by the library", + "", + "tmpfile", "avoid-race", {}), + + "fopen|open": + (normal, 2, + "Check when opening files - can an attacker redirect it (via symlinks), force the opening of special file type (e.g., device files), move things around to create a race condition, control its ancestors, or change its contents?", + "", + "misc", "", {}), + + "umask": + (normal, 1, + "Ensure that umask is given most restrictive possible setting (e.g., 066 or 077)", + "", + "access", "", {}), + + # Windows. TODO: Detect correct usage approaches and ignore it. + "GetTempFileName": + (normal, 3, + "Temporary file race condition in certain cases " + + "(e.g., if run as SYSTEM in many versions of Windows)", + "", + "tmpfile", "avoid-race", {}), + + # TODO: Need to detect varying levels of danger. + "execl|execlp|execle|execv|execvp|system|popen|WinExec|ShellExecute": + (normal, 4, + "This causes a new program to execute and is difficult to use safely", + "try using a library call that implements the same functionality " + + "if available", + "shell", "", {}), + + # TODO: Need to detect varying levels of danger. + "execl|execlp|execle|execv|execvp|system|popen|WinExec|ShellExecute": + (normal, 4, + "This causes a new program to execute and is difficult to use safely", + "try using a library call that implements the same functionality " + + "if available", + "shell", "", {}), + + # TODO: Be more specific. The biggest problem involves "first" param NULL, + # second param with embedded space. Windows. + "CreateProcessAsUser|CreateProcessWithLogon": + (normal, 3, + "This causes a new process to execute and is difficult to use safely", + "Especially watch out for embedded spaces", + "shell", "", {}), + + # TODO: Be more specific. The biggest problem involves "first" param NULL, + # second param with embedded space. Windows. + "CreateProcess": + (c_hit_if_null, 3, + "This causes a new process to execute and is difficult to use safely", + "Specify the application path in the first argument, NOT as part of the second, " + + "or embedded spaces could allow an attacker to force a different program to run", + "shell", "", {'check_for_null' : 1}), + + # Random values. Don't trigger on "initstate", it's too common a term. + "drand48|erand48|jrand48|lcong48|lrand48|mrand48|nrand48|random|seed48|setstate|srand|strfry|srandom": + (normal, 3, + "This function is not sufficiently random for security-related functions such as key and nonce creation", + "use a more secure technique for acquiring random values", + "random", "", {}), + + "crypt": + (normal, 4, + "Function crypt is a poor one-way hashing algorithm; since it only accepts passwords of 8 " + + "characters or less, and only a two-byte salt, it is excessively vulnerable to " + + "dictionary attacks given today's faster computing equipment", + "Use a different algorithm, such as SHA-1, with a larger non-repeating salt", + "crypto", "", {}), + + # OpenSSL EVP calls to use DES. + "EVP_des_ecb|EVP_des_cbc|EVP_des_cfb|EVP_des_ofb|EVP_desx_cbc": + (normal, 4, + "DES only supports a 56-bit keysize, which is too small given today's computers", + "Use a different patent-free encryption algorithm with a larger keysize, " + + "such as 3DES or AES", + "crypto", "", {}), + + # Other OpenSSL EVP calls to use small keys. + "EVP_rc4_40|EVP_rc2_40_cbc|EVP_rc2_64_cbc": + (normal, 4, + "These keysizes are too small given today's computers", + "Use a different patent-free encryption algorithm with a larger keysize, " + + "such as 3DES or AES", + "crypto", "", {}), + + "chroot": + (normal, 3, + "chroot can be very helpful, but is hard to use correctly", + "Make sure the program immediately chdir(\"/\"), closes file descriptors, " + + "and drops root privileges, and that all necessary files (and no more!) are " + + "in the new root", + "misc", "", {}), + + "getenv|curl_getenv": + (normal, 3, "Environment variables are untrustable input if they can be" + "it returns untrustable input if the environment can be" + + "set by an attacker. It can have any content and length, " + + "and the same variable can be set more than once", + "Check environment variables carefully before using them", + "buffer", "", {'input' : 1}), + + "g_get_home_dir": + (normal, 3, "This function is synonymous with 'getenv(\"HOME\")';" + + "it returns untrustable input if the environment can be" + + "set by an attacker. It can have any content and length, " + + "and the same variable can be set more than once", + "Check environment variables carefully before using them", + "buffer", "", {'input' : 1}), + + "g_get_tmp_dir": + (normal, 3, "This function is synonymous with 'getenv(\"TMP\")';" + + "it returns untrustable input if the environment can be" + + "set by an attacker. It can have any content and length, " + + "and the same variable can be set more than once", + "Check environment variables carefully before using them", + "buffer", "", {'input' : 1}), + + + # These are Windows-unique: + + # TODO: Should have lower risk if the program checks return value. + "RpcImpersonateClient|ImpersonateLoggedOnUser|CoImpersonateClient|" + + "ImpersonateNamedPipeClient|ImpersonateDdeClientWindow|ImpersonateSecurityContext|" + + "SetThreadToken": + (normal, 4, "If this call fails, the program could fail to drop heightened privileges", + "Make sure the return value is checked, and do not continue if a failure is reported", + "access", "", {}), + + "InitializeCriticalSection": + (normal, 3, "Exceptions can be thrown in low-memory situations", + "Use InitializeCriticalSectionAndSpinCount instead", + "misc", "", {}), + + "EnterCriticalSection": + (normal, 3, "On some versions of Windows, exceptions can be thrown in low-memory situations", + "Use InitializeCriticalSectionAndSpinCount instead", + "misc", "", {}), + + "LoadLibrary|LoadLibraryEx": + (normal, 3, "Ensure that the full path to the library is specified, or current directory may be used", + "Use registry entry or GetWindowsDirectory to find library path, if you aren't already", + "misc", "", {'input' : 1}), + + "SetSecurityDescriptorDacl": + (c_hit_if_null, 5, + "Never create NULL ACLs; an attacker can set it to Everyone (Deny All Access), " + + "which would even forbid administrator access", + "", + "misc", "", {'check_for_null' : 3}), + + "AddAccessAllowedAce": + (normal, 3, + "This doesn't set the inheritance bits in the access control entry (ACE) header", + "Make sure that you set inheritance by hand if you wish it to inherit", + "misc", "", {}), + + "getlogin": + (normal, 4, + "It's often easy to fool getlogin. Sometimes it does not work at all, because some program messed up the utmp file. Often, it gives only the first 8 characters of the login name. The user currently logged in on the controlling tty of our program need not be the user who started it. Avoid getlogin() for security-related purposes", + "Use getpwuid(geteuid()) and extract the desired information instead", + "misc", "", {}), + + "cuserid": + (normal, 4, + "Exactly what cuserid() does is poorly defined (e.g., some systems use the effective uid, like Linux, while others like System V use the real uid). Thus, you can't trust what it does. It's certainly not portable (The cuserid function was included in the 1988 version of POSIX, but removed from the 1990 version). Also, if passed a non-null parameter, there's a risk of a buffer overflow if the passed-in buffer is not at least L_cuserid characters long", + "Use getpwuid(geteuid()) and extract the desired information instead", + "misc", "", {}), + + "getpw": + (normal, 4, + "This function is dangerous; it may overflow the provided buffer. It extracts data from a 'protected' area, but most systems have many commands to let users modify the protected area, and it's not always clear what their limits are. Best to avoid using this function altogether", + "Use getpwuid() instead", + "buffer", "", {}), + + "getpass": + (normal, 4, + "This function is obsolete and not portable. It was in SUSv2 but removed by POSIX.2. What it does exactly varies considerably between systems, particularly in where its prompt is displayed and where it gets its data (e.g., /dev/tty, stdin, stderr, etc.)", + "Make the specific calls to do exactly what you want. If you continue to use it, or write your own, be sure to zero the password as soon as possible to avoid leaving the cleartext password visible in the process' address space", + "misc", "", {}), + + "gsignal|ssignal": + (normal, 2, + "These functions are considered obsolete on most systems, and very non-poertable (Linux-based systems handle them radically different, basically if gsignal/ssignal were the same as raise/signal respectively, while System V considers them a separate set and obsolete)", + "Switch to raise/signal, or some other signalling approach", + "obsolete", "", {}), + + "memalign": + (normal, 1, + "On some systems (though not Linux-based systems) an attempt to free() results from memalign() may fail. This may, on a few systems, be exploitable. Also note that memalign() may not check that the boundary parameter is correct", + "Use posix_memalign instead (defined in POSIX's 1003.1d). Don't switch to valloc(); it is marked as obsolete in BSD 4.3, as legacy in SUSv2, and is no longer defined in SUSv3. In some cases, malloc()'s alignment may be sufficient", + "free", "", {}), + + "ulimit": + (normal, 1, + "This C routine is considered obsolete (as opposed to the shell command by the same name, which is NOT obsolete)", + "Use getrlimit(2), setrlimit(2), and sysconf(3) instead", + "obsolete", "", {}), + + "usleep": + (normal, 1, + "This C routine is considered obsolete (as opposed to the shell command by the same name). The interaction of this function with SIGALRM and other timer functions such as sleep(), alarm(), setitimer(), and nanosleep() is unspecified", + "Use nanosleep(2) or setitimer(2) instead", + "obsolete", "", {}), + + + # Input functions, useful for -I + "recv|recvfrom|recvmsg|fread|readv": + (normal, 0, "Function accepts input from outside program", + "Make sure input data is filtered, especially if an attacker could manipulate it", + "input", "", {'input' : 1}), + + + # TODO: detect C++'s: cin >> charbuf, where charbuf is a char array; the problem + # is that flawfinder doesn't have type information, and ">>" is safe with + # many other types. + # ("send" and friends aren't todo, because they send out.. not input.) + # TODO: cwd("..") in user's space - TOCTOU vulnerability + # TODO: There are many more rules to add, esp. for TOCTOU. + } + +template_ruleset = { + # This is a template for adding new entries (the key is impossible): + "9": + (normal, 2, + "", + "", + "tmpfile", "", {}), + } + + +def find_column(text, position): + "Find column number inside line." + newline = string.rfind(text, "\n", 0, position) + if newline == -1: + return position + 1 + else: + return position - newline + +def get_context(text, position): + "Get surrounding text line starting from text[position]" + linestart = string.rfind(text, "\n", 0, position+1) + 1 + lineend = string.find(text, "\n", position, len(text)) + if lineend == -1: lineend = len(text) + return text[linestart:lineend] + +def c_valid_match(text, position): + # Determine if this is a valid match, or a false positive. + # If false positive controls aren't on, always declare it's a match: + i = position + while i < len(text): + c = text[i] + if c == '(': return 1 + elif c in string.whitespace: i = i + 1 + else: + if falsepositive: return 0 # No following "(", presume invalid. + if c in "=+-": + # This is very unlikely to be a function use. If c is '=', + # the name is followed by an assignment or is-equal operation. + # Since the names of library functions are really unlikely to be + # followed by an assignment statement or 'is-equal' test, + # while this IS common for variable names, let's declare it invalid. + # It's possible that this is a variable function pointer, pointing + # to the real library function, but that's really improbable. + # If c is "+" or "-", we have a + or - operation. + # In theory "-" could be used for a function pointer difference + # computation, but this is extremely improbable. + # More likely: this is a variable in a computation, so drop it. + return 0 + return 1 + return 0 # Never found anything other than "(" and whitespace. + +def process_directive(): + "Given a directive, process it." + global ignoreline, num_ignored_hits + # TODO: Currently this is just a stub routine that simply removes + # hits from the current line, if any, and sets a flag if not. + # Thus, any directive is considered the "ignore" directive. + # Currently that's okay because we don't have any other directives yet. + if never_ignore: return + hitfound = 0 + # Iterate backwards over hits, to be careful about the destructive iterator + for i in xrange(len(hitlist)-1, -1, -1): + if hitlist[i].line == linenumber: + del hitlist[i] # DESTROY - this is a DESTRUCTIVE iterator. + hitfound = 1 # Don't break, because there may be more than one. + num_ignored_hits = num_ignored_hits + 1 + if not hitfound: + ignoreline = linenumber + 1 # Nothing found - ignore next line. + +# Characters that can be in a string. +# 0x4, 4.4e4, etc. +numberset=string.hexdigits+"_x.Ee" + +# Patterns for various circumstances: +p_include = re.compile( r'#\s*include\s+(<.*?>|".*?")' ) +p_digits = re.compile( r'[0-9]' ) +p_alphaunder = re.compile( r'[A-Za-z_]' ) # Alpha chars and underline. +# A "word" in C. Note that "$" is permitted -- it's not permitted by the +# C standard in identifiers, but gcc supports it as an extension. +p_c_word = re.compile( r'[A-Za-z_][A-Za-z_0-9$]*' ) +# We'll recognize ITS4 and RATS ignore directives, as well as our own, +# for compatibility's sake: +p_directive = re.compile( r'(?i)\s*(ITS4|Flawfinder|RATS):\s*([^\*]*)' ) + +max_lookahead=500 # Lookahead limit for c_static_array. + +def process_c_file(f): + global filename, linenumber, ignoreline, sumlines, num_links_skipped + global sloc + filename=f + linenumber = 1 + ignoreline = -1 + + incomment = 0 + instring = 0 + linebegin = 1 + codeinline = 0 # 1 when we see some code (so increment sloc at newline) + + if f == "-": + input = sys.stdin + else: + # This should never happen. + if ((not allowlink) and os.path.islink(f)): + print "BUG! Somehow got a symlink in process_c_file!" + num_links_skipped = num_links_skipped + 1 + return + input = open(f, "r") + + # Read ENTIRE file into memory. Use readlines() to convert \n if necessary. + # This turns out to be very fast in Python, even on large files, and it + # eliminates lots of range checking later, making the result faster. + # We're examining source files, and today, it would be EXTREMELY bad practice + # to create source files larger than main memory space. + # Better to load it all in, and get the increased speed and reduced + # development time that results. + + if not quiet: + if output_format: + print "Examining", h(f), "
    " + else: + print "Examining", f + sys.stdout.flush() + + text = string.join(input.readlines(),"") + + i = 0 + while i < len(text): + # This is a trivial tokenizer that just tries to find "words", which + # match [A-Za-z_][A-Za-z0-9_]*. It skips comments & strings. + # It also skips "#include <...>", which must be handled specially + # because "<" and ">" aren't usually delimiters. + # It doesn't bother to tokenize anything else, since it's not used. + # The following is a state machine with 3 states: incomment, instring, + # and "normal", and a separate state "linebegin" if at BOL. + c = text[i] + if linebegin: # If at beginning of line, see if #include is there. + linebegin = 0 + if c == "#": codeinline = 1 # A directive, count as code. + m = p_include.match(text,i) + if m: # Found #include, skip it. Otherwise: #include + i = m.end(0) + continue + if c == "\n": + linenumber = linenumber + 1 + sumlines = sumlines + 1 + linebegin = 1 + if codeinline: sloc = sloc + 1 + codeinline = 0 + i = i +1 + continue + i = i + 1 # From here on, text[i] points to next character. + # Skip whitespace: + if (c == " ") or (c == "\t") or (c == "\v") or (c == "\f"): continue + if i < len(text): nextc = text[i] + else: nextc = '' + if incomment: + if c=='*' and nextc=='/': + i = i + 1 + incomment = 0 + elif instring: + if c == '\\': i = i + 1 + elif c == '"' and instring == 1: instring = 0 + elif c == "'" and instring == 2: instring = 0 + else: + if c=='/' and nextc=='*': + m = p_directive.match(text, i+1) # Is there a directive here? + if m: + process_directive() + i = i + 1 + incomment = 1 + elif c=='/' and nextc=='/': # "//" comments - skip to EOL. + m = p_directive.match(text, i+1) # Is there a directive here? + if m: + process_directive() + while i" + if list_rules: + display_ruleset(c_ruleset) + sys.exit(0) + + +# Show the header, but only if it hasn't been shown yet. +def display_header(): + global displayed_header + if not showheading: return + if not displayed_header: + if output_format: + print ('') + print "" + print "" + print '' + print "Flawfinder Results" + print '' + print '' + print "" + print "" + print "

    Flawfinder Results

    " + print "Here are the security scan results from" + print 'Flawfinder version %s,' % version + print '(C) 2001-2004 David A. Wheeler.' + else: + print "Flawfinder version %s, (C) 2001-2004 David A. Wheeler." % version + displayed_header = 1 + + +c_extensions = { '.c' : 1, '.h' : 1, + '.ec': 1, '.ecp': 1, # Informix embedded C. + '.pgc': 1, # Postgres embedded C. + '.C': 1, '.cpp': 1, '.CPP': 1, '.cxx': 1, '.cc': 1, '.CC' : 1, # C++. + '.pcc': 1, # Oracle C++ + '.hpp': 1, '.H' : 1, # .h - usually C++. + } + + +def maybe_process_file(f): + # process f, but only if (1) it's a directory (so we recurse), or + # (2) it's source code in a language we can handle. + # Currently, for files that means only C/C++, and we check if the filename + # has a known C/C++ filename extension. If it doesn't, we ignore the file. + # We accept symlinks only if allowlink is true. + global num_links_skipped + if os.path.isdir(f): + if (not allowlink) and os.path.islink(f): + if not quiet: print "Warning: skipping symbolic link directory", h(f) + num_links_skipped = num_links_skipped + 1 + return + for file in os.listdir(f): + maybe_process_file(os.path.join(f, file)) + # Now we will FIRST check if the file appears to be a C/C++ file, and + # THEN check if it's a regular file or symlink. This is more complicated, + # but I do it this way so that there won't be a lot of pointless + # warnings about skipping files we wouldn't have used anyway. + dotposition = string.rfind(f, ".") + if dotposition > 1: + extension = f[dotposition:] + if c_extensions.has_key(extension): + # Its name appears to be a C/C++ source code file. + if (not allowlink) and os.path.islink(f): + if not quiet: print "Warning: skipping symbolic link file", h(f) + num_links_skipped = num_links_skipped + 1 + elif not os.path.isfile(f): + # Skip anything not a normal file. This is so that + # device files, etc. won't cause trouble. + if not quiet: print "Warning: skipping non-regular file", h(f) + else: + process_c_file(f) + + +def process_file_args(files): + # Process the list of "files", some of which may be directories, + # which were given on the command line. + # This is handled differently than anything not found on the command line + # (i.e. through recursing into a directory) because flawfinder + # ALWAYS processes normal files given on the command line. + # This is done to give users control over what's processed; + # if a user really, really wants to analyze a file, name it! + # If user wants to process "this directory and down", just say ".". + # We handle symlinks specially, handle normal files and directories, + # and skip the rest to prevent security problems. "-" is stdin. + global num_links_skipped + for f in files: + if (not allowlink) and os.path.islink(f): + if not quiet: print "Warning: skipping symbolic link", h(f) + num_links_skipped = num_links_skipped + 1 + elif os.path.isfile(f) or f == "-": + # If on the command line, FORCE processing of it. + # Currently, we only process C/C++. + process_c_file(f) + elif os.path.isdir(f): + # At one time flawfinder used os.path.walk, but that Python + # built-in doesn't give us enough control over symbolic links. + # So, we'll walk the filesystem hierarchy ourselves: + maybe_process_file(f) + else: + if not quiet: print "Warning: skipping non-regular file", h(f) + +def usage(): + print """ +flawfinder [--help] [--context] [-c] [--columns | -C] [--html] + [--dataonly | -D] + [--minlevel=X | -m X] + [--immediate] [-i] [--inputs | -I] [--neverignore | -n] + [--quiet | -Q] [--singleline | -S ] + [--loadhitlist=F ] [ --savehitlist=F ] [ --diffhitlist=F ] + [--listrules] + [--] [ source code file or source root directory ]+ + + --help Show this usage help + + --allowlink + Allow symbolic links. + + --context + -c Show context (the line having the "hit"/potential flaw) + + --columns Show the column number (as well as the file name and + line number) of each hit; this is shown after the line number + by adding a colon and the column number in the line (the first + character in a line is column number 1). + + --html Display as HTML output. + + -m X + --minlevel=X + Set minimum risk level to X for inclusion in hitlist. This + can be from 0 (``no risk'') to 5 (``maximum risk''); the + default is 1. + + -S + --singleline Single-line output. + + --neverignore + -n Never ignore security issues, even if they have an ``ignore'' + directive in a comment. + + --immediate + -i Immediately display hits (don't just wait until the end). + + --inputs Show only functions that obtain data from outside the program; + -I this also sets minlevel to 0. + + --nolink Skip symbolic links (ignored). + + --omittime Omit time to run. + + --Q + --quiet Don't display status information (i.e., which files are being + examined) while the analysis is going on. + + --D + --dataonly Don't display the headers and footers of the analysis; + use this along with --quiet to get just the results. + + --listrules List the rules in the ruleset (rule database). + + --loadhitlist=F + Load hits from F instead of analyzing source programs. + + --savehitlist=F + Save all hits (the "hitlist") to F. + + --diffhitlist=F + Show only hits (loaded or analyzed) not in F. + + + For more information, please consult the manpage or available + documentation. +""" + +def process_options(): + global show_context, show_inputs, allowlink, omit_time + global output_format, minimum_level, show_immediately, single_line + global falsepositive + global show_columns, never_ignore, quiet, showheading, list_rules + global loadhitlist, savehitlist, diffhitlist + try: + # Note - as a side-effect, this sets sys.argv[]. + optlist, args = getopt.getopt(sys.argv[1:], "cm:nih?CSDQIF", + ["context", "minlevel=", "immediate", "inputs", "input", + "nolink", "falsepositive", "falsepositives", + "columns", "listrules", "omittime", "allowlink", + "neverignore", "quiet", "dataonly", "html", "singleline", + "loadhitlist=", "savehitlist=", "diffhitlist=", + "version", "help" ]) + for (opt,value) in optlist: + if opt == "--context" or opt == "-c": + show_context = 1 + elif opt == "--columns" or opt == "-C": + show_columns = 1 + elif opt == "--quiet" or opt == "-Q": + quiet = 1 + elif opt == "--dataonly" or opt == "-D": + showheading = 0 + elif opt == "--inputs" or opt == "--input" or opt == "-I": + show_inputs = 1 + minimum_level = 0 + elif opt == "--falsepositive" or opt == "falsepositives" or opt == "-F": + falsepositive = 1 + elif opt == "--nolink": + allowlink = 0 + elif opt == "--omittime": + omit_time = 1 + elif opt == "--allowlink": + allowlink = 1 + elif opt == "--listrules": + list_rules = 1 + elif opt == "--html": + output_format = 1 + single_line = 0 + elif opt == "--minlevel" or opt == "-m": + minimum_level = string.atoi(value) + elif opt == "--singleline" or opt == "-S": + single_line = 1 + elif opt == "--immediate" or opt == "-i": + show_immediately = 1 + elif opt == "-n" or opt == "--neverignore": + never_ignore = 1 + elif opt == "--loadhitlist": + loadhitlist = value + display_header() + if showheading: print "Loading hits from", value + elif opt == "--savehitlist": + savehitlist = value + display_header() + if showheading: print "Saving hitlist to", value + elif opt == "--diffhitlist": + diffhitlist = value + display_header() + if showheading: print "Showing hits not in", value + elif opt == "--version": + print version + sys.exit(0) + elif opt in [ '-h', '-?', '--help' ]: + usage() + sys.exit(0) + # For DOS/Windows, expand filenames; for Unix, DON'T expand them + # (the shell will expand them for us). Some sloppy Python programs + # always call "glob", but that's WRONG -- on Unix-like systems that + # will expand twice. Python doesn't have a clean way to detect + # "has globbing occurred", so this is the best I've found: + if os.name == "windows" or os.name == "nt" or os.name == "dos": + sys.argv[1:] = reduce(operator.add, map(glob.glob, args)) + else: + sys.argv[1:] = args + # In Python 2 the convention is "getopt.GetoptError", but we + # use "getopt.error" here so it's compatible with both + # Python 1.5 and Python 2. + except getopt.error, text: + print "*** getopt error:", text + usage() + sys.exit(1) + + + +def process_files(): + global hitlist + if loadhitlist: + f = open(loadhitlist) + hitlist = pickle.load(f) + else: + files = sys.argv[1:] + if not files: + print "*** No input files" + return None + process_file_args(files) + return 1 + + +def show_final_results(): + global hitlist + count = 0 + count_per_level = {} + count_per_level_and_up = {} + for i in range(0,6): # Initialize count_per_level + count_per_level[i] = 0 + for i in range(0,6): # Initialize count_per_level + count_per_level_and_up[i] = 0 + if show_immediately: # Separate the final results. + print + if showheading: + if output_format: + print "

    Final Results

    " + else: + print "FINAL RESULTS:" + print + hitlist.sort() + # Display results. The HTML format now uses + #