Skip to content
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 0 additions & 15 deletions python/ql/src/Resources/FileNotAlwaysClosed.py

This file was deleted.

28 changes: 13 additions & 15 deletions python/ql/src/Resources/FileNotAlwaysClosed.qhelp
Original file line number Diff line number Diff line change
Expand Up @@ -4,32 +4,30 @@
<qhelp>

<overview>
<p> If a file is opened then it should always be closed again, even if an
exception is raised.
Failing to ensure that all files are closed may result in failure due to too
many open files.</p>

<p>When a file is opened, it should always be closed.
</p>
<p>A file opened for writing that is not closed when the application exits may result in data loss, where not all of the data written may be saved to the file.
A file opened for reading or writing that is not closed may also use up file descriptors, which is a resource leak that in long running applications could lead to a failure to open additional files.
</p>
</overview>
<recommendation>

<p>Ensure that if you open a file it is always closed on exiting the method.
Wrap the code between the <code>open()</code> and <code>close()</code>
functions in a <code>with</code> statement or use a <code>try...finally</code>
statement. Using a <code>with</code> statement is preferred as it is shorter
and more readable.</p>
<p>Ensure that opened files are always closed, including when an exception could be raised.
The best practice is often to use a <code>with</code> statement to automatically clean up resources.
Otherwise, ensure that <code>.close()</code> is called in a <code>try...except</code> or <code>try...finally</code>
block to handle any possible exceptions.
</p>

</recommendation>
<example>
<p>The following code shows examples of different ways of closing a file. In the first example, the
file is closed only if the method is exited successfully. In the other examples, the file is always
closed on exiting the method.</p>
<p>In the following examples, in the case marked BAD, the file may not be closed if an exception is raised. In the cases marked GOOD, the file is always closed.</p>

<sample src="FileNotAlwaysClosed.py" />
<sample src="examples/FileNotAlwaysClosed.py" />

</example>
<references>


<li>Python Documentation: <a href="https://docs.python.org/3/tutorial/inputoutput.html#reading-and-writing-files">Reading and writing files</a>.</li>
<li>Python Language Reference: <a href="http://docs.python.org/reference/compound_stmts.html#the-with-statement">The with statement</a>,
<a href="http://docs.python.org/reference/compound_stmts.html#the-try-statement">The try statement</a>.</li>
<li>Python PEP 343: <a href="http://www.python.org/dev/peps/pep-0343">The "with" Statement</a>.</li>
Expand Down
69 changes: 10 additions & 59 deletions python/ql/src/Resources/FileNotAlwaysClosed.ql
Original file line number Diff line number Diff line change
@@ -1,74 +1,25 @@
/**
* @name File is not always closed
* @description Opening a file without ensuring that it is always closed may cause resource leaks.
* @description Opening a file without ensuring that it is always closed may cause data loss or resource leaks.
* @kind problem
* @tags efficiency
* correctness
* resources
* external/cwe/cwe-772
* @problem.severity warning
* @sub-severity high
* @precision medium
* @precision high
* @id py/file-not-closed
*/

import python
import FileOpen
import FileNotAlwaysClosedQuery

/**
* Whether resource is opened and closed in in a matched pair of methods,
* either `__enter__` and `__exit__` or `__init__` and `__del__`
*/
predicate opened_in_enter_closed_in_exit(ControlFlowNode open) {
file_not_closed_at_scope_exit(open) and
exists(FunctionValue entry, FunctionValue exit |
open.getScope() = entry.getScope() and
exists(ClassValue cls |
cls.declaredAttribute("__enter__") = entry and cls.declaredAttribute("__exit__") = exit
or
cls.declaredAttribute("__init__") = entry and cls.declaredAttribute("__del__") = exit
) and
exists(AttrNode attr_open, AttrNode attrclose |
attr_open.getScope() = entry.getScope() and
attrclose.getScope() = exit.getScope() and
expr_is_open(attr_open.(DefinitionNode).getValue(), open) and
attr_open.getName() = attrclose.getName() and
close_method_call(_, attrclose)
)
)
}

predicate file_not_closed_at_scope_exit(ControlFlowNode open) {
exists(EssaVariable v |
BaseFlow::reaches_exit(v) and
var_is_open(v, open) and
not file_is_returned(v, open)
)
or
call_to_open(open) and
not exists(AssignmentDefinition def | def.getValue() = open) and
not exists(Return r | r.getValue() = open.getNode())
}

predicate file_not_closed_at_exception_exit(ControlFlowNode open, ControlFlowNode exit) {
exists(EssaVariable v |
exit.(RaisingNode).viableExceptionalExit(_, _) and
not closes_arg(exit, v.getSourceVariable()) and
not close_method_call(exit, v.getAUse().(NameNode)) and
var_is_open(v, open) and
v.getAUse() = exit.getAChild*()
)
}

/* Check to see if a file is opened but not closed or returned */
from ControlFlowNode defn, string message
from FileOpen fo, string msg
where
not opened_in_enter_closed_in_exit(defn) and
(
file_not_closed_at_scope_exit(defn) and message = "File is opened but is not closed."
or
not file_not_closed_at_scope_exit(defn) and
file_not_closed_at_exception_exit(defn, _) and
message = "File may not be closed if an exception is raised."
)
select defn.getNode(), message
fileNotClosed(fo) and
msg = "File is opened but is not closed."
or
fileMayNotBeClosedOnException(fo, _) and
msg = "File may not be closed if an exception is raised."
select fo, msg
139 changes: 139 additions & 0 deletions python/ql/src/Resources/FileNotAlwaysClosedQuery.qll
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
/** Definitions for reasoning about whether files are closed. */

import python
import semmle.python.dataflow.new.internal.DataFlowDispatch
import semmle.python.ApiGraphs

/** A CFG node where a file is opened. */
abstract class FileOpenSource extends DataFlow::CfgNode { }

/** A call to the builtin `open` or `os.open`. */
class FileOpenCall extends FileOpenSource {
FileOpenCall() {
this = [API::builtin("open").getACall(), API::moduleImport("os").getMember("open").getACall()]
}
}

private DataFlow::TypeTrackingNode fileOpenInstance(DataFlow::TypeTracker t) {
t.start() and
result instanceof FileOpenSource
or
exists(DataFlow::TypeTracker t2 | result = fileOpenInstance(t2).track(t2, t))
}

/**
* A call that returns an instance of an open file object.
* This includes calls to methods that transitively call `open` or similar.
*/
class FileOpen extends DataFlow::CallCfgNode {
FileOpen() { fileOpenInstance(DataFlow::TypeTracker::end()).flowsTo(this) }
}

/** A call that may wrap a file object in a wrapper class or `os.fdopen`. */
class FileWrapperCall extends DataFlow::CallCfgNode {
DataFlow::Node wrapped;

FileWrapperCall() {
wrapped = this.getArg(_).getALocalSource() and
this.getFunction() = classTracker(_)
or
wrapped = this.getArg(0) and
this = API::moduleImport("os").getMember("fdopen").getACall()
or
wrapped = this.getArg(0) and
this = API::moduleImport("django").getMember("http").getMember("FileResponse").getACall()
}

/** Gets the file that this call wraps. */
DataFlow::Node getWrapped() { result = wrapped }
}

/** A node where a file is closed. */
abstract class FileClose extends DataFlow::CfgNode {
/** Holds if this file close will occur if an exception is thrown at `e`. */
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

By e do you mean raises?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes; I had changed the name at some point and missed that comment.

predicate guardsExceptions(DataFlow::CfgNode raises) {
this.asCfgNode() = raises.asCfgNode().getAnExceptionalSuccessor().getASuccessor*()
or
// The expression is after the close call.
// This also covers the body of a `with` statement.
raises.asCfgNode() = this.asCfgNode().getASuccessor*()
}
}

/** A call to the `.close()` method of a file object. */
class FileCloseCall extends FileClose {
FileCloseCall() { exists(DataFlow::MethodCallNode mc | mc.calls(this, "close")) }
}

/** A call to `os.close`. */
class OsCloseCall extends FileClose {
OsCloseCall() { this = API::moduleImport("os").getMember("close").getACall().getArg(0) }
}

/** A `with` statement. */
class WithStatement extends FileClose {
With w;

WithStatement() { this.asExpr() = w.getContextExpr() }
}

/** Holds if an exception may be raised at `raises` if `file` is a file object. */
private predicate mayRaiseWithFile(DataFlow::CfgNode file, DataFlow::CfgNode raises) {
// Currently just consider any method called on `node`; e.g. `file.write()`; as potentially raising an exception
raises.(DataFlow::MethodCallNode).getObject() = file and
not file instanceof FileOpen and
not file instanceof FileClose
}

/** Holds if data flows from `nodeFrom` to `nodeTo` in one step that also includes file wrapper classes. */
private predicate fileLocalFlowStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
DataFlow::localFlowStep(nodeFrom, nodeTo)
or
exists(FileWrapperCall fw | nodeFrom = fw.getWrapped() and nodeTo = fw)
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This extension of local flow makes me wonder if it would make more sense to rewrite this part of the query as a proper data-flow query (with an additional step for file wrapper calls). My main worry is that calculating the fileLocalFlow relation might result in bad performance.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hm, full dataflow could improve precison on the trickier cases like the file being closed through a wrapper. I wasn't sure of the performance impact compared to local reasoning, however.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My main worry about using global dataflow would be if there were a lot of sources, but my gut feeling is that there probably aren't that many open calls in your average codebase. You probably have a better intuition for this than me, though.


/** Holds if data flows from `source` to `sink`, including file wrapper classes. */
private predicate fileLocalFlow(DataFlow::Node source, DataFlow::Node sink) {
fileLocalFlowStep*(source, sink)
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unless I've misread the rest of the code, source will in fact always be a FileOpen instance. If this is true, it might make sense to specialise the argument to that class (a kind of manual "magic"), as this would certainly reduce the size of the fileLocalFlow predicate (assuming the compiler hasn't already figured that this kind of magic is available).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like this change caused DCA to timeout on FreeCAD. I'm not sure why, but I'll revert it for now.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay, that's very concerning. There is a reason why the definition of localStep is marked as pragma[inline]. I'm not entirely comfortable with this query (potentially) materialising an even bigger predicate.

In this case, I think there might be a better solution: restrict the fileLocalFlowStep relation to LocalSourceNodes. I think this should work, since FileOpen must be a LocalSourceNode, and so must the wrappers (as they are CallCfgNodes). Of course, the external interface to fileLocalFlow must then use flowsTo to get to the final sink node. And of course, the step from a LocalSourceNode to its FileWrapperCall should also use flowsTo.

Does the above strategy make sense to you? If not, let me know and we can have a look at it together. 🙂

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Implemented this.
It looks like outside of FreeCAD, which is timing out on main regardless of these changes, performance seems fine.


/** Holds if the file opened at `fo` is closed. */
predicate fileIsClosed(FileOpen fo) { exists(FileClose fc | fileLocalFlow(fo, fc)) }

/** Holds if the file opened at `fo` is returned to the caller. This makes the caller responsible for closing the file. */
predicate fileIsReturned(FileOpen fo) {
exists(Return ret, Expr retVal |
(
retVal = ret.getValue()
or
retVal = ret.getValue().(List).getAnElt()
or
retVal = ret.getValue().(Tuple).getAnElt()
) and
fileLocalFlow(fo, DataFlow::exprNode(retVal))
)
}

/** Holds if the file opened at `fo` is stored in a field. We assume that another method is then responsible for closing the file. */
predicate fileIsStoredInField(FileOpen fo) {
exists(DataFlow::AttrWrite aw | fileLocalFlow(fo, aw.getValue()))
}

/** Holds if the file opened at `fo` is not closed, and is expected to be closed. */
predicate fileNotClosed(FileOpen fo) {
not fileIsClosed(fo) and
not fileIsReturned(fo) and
not fileIsStoredInField(fo)
}

predicate fileMayNotBeClosedOnException(FileOpen fo, DataFlow::Node raises) {
fileIsClosed(fo) and
exists(DataFlow::CfgNode fileRaised |
mayRaiseWithFile(fileRaised, raises) and
fileLocalFlow(fo, fileRaised) and
not exists(FileClose fc |
fileLocalFlow(fo, fc) and
fc.guardsExceptions(raises)
)
)
}
6 changes: 5 additions & 1 deletion python/ql/src/Resources/FileOpen.qll
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
/** Contains predicates concerning when and where files are opened and closed. */
/**
* DEPRECATED: Use FileNotAlwaysClosedQuery instead.
* Contains predicates concerning when and where files are opened and closed.
*/
deprecated module;

import python
import semmle.python.pointsto.Filters
Expand Down
17 changes: 17 additions & 0 deletions python/ql/src/Resources/examples/FileNotAlwaysClosed.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
def bad():
f = open("filename", "w")
f.write("could raise exception") # BAD: This call could raise an exception, leading to the file not being closed.
f.close()


def good1():
with open("filename", "w") as f:
f.write("always closed") # GOOD: The `with` statement ensures the file is always closed.

def good2():
f = open("filename", "w")
try:
f.write("always closed")
finally:
f.close() # GOOD: The `finally` block always ensures the file is closed.

Loading