Skip site navigation (1) Skip section navigation (2)

Peripheral Links

Header And Logo

PostgreSQL
| The world's most advanced open source database.

Site Navigation

Search archives
  Advanced Search

Removing unreferenced files


  • From: Bruce Momjian <pgman(at)candle(dot)pha(dot)pa(dot)us>
  • To: PostgreSQL-patches <pgsql-patches(at)postgresql(dot)org>
  • Subject: Removing unreferenced files
  • Date: Thu, 8 Jun 2006 11:08:05 -0400 (EDT)
  • Message-id: <200606081508.k58F85m29270@candle.pha.pa.us> <text/plain>

Here is a cleaned-up version of the unreference file patch that was
discussed extensively in May of 2005.  I want to get it into the
archives in case someone else want to work on it.

Here is a reference to the work still needed on the patch:

	http://archives.postgresql.org/pgsql-patches/2005-05/msg00024.php

-- 
  Bruce Momjian   http://candle.pha.pa.us
  EnterpriseDB    http://www.enterprisedb.com

  + If your life is a hard drive, Christ can be your backup. +
/*-------------------------------------------------------------------------
 *
 *	checkfiles.c
 *	  check for stale relation files during crash recovery
 *
 *	If a backend crashes while in a transaction that has created or
 *	deleted a relfilenode, a stale file can be left over in the data
 *	directory. This file contains routines to clean up those stale
 *	files on recovery.
 *
 *	This adds a 17% increase in startup cost for 100 empty databases.  bjm
 *	One optimization would be to create a 'dirty' file on a postmaster recovery
 *	and remove the dirty flag only when a clean startup detects no unreferenced
 *	files, and use the 'dirty' flag to determine if we should run this on
 *	a clean startup.
 *
 * $PostgreSQL: pgsql/src/backend/utils/init/checkfiles.c,v 1.2 2005/05/05 22:18:27 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
#include "postgres.h"

#include "access/heapam.h"
#include "access/relscan.h"
#include "access/skey.h"
#include "catalog/catalog.h"
#include "catalog/pg_tablespace.h"
#include "miscadmin.h"
#include "storage/fd.h"
#include "utils/flatfiles.h"
#include "utils/fmgroids.h"
#include "utils/resowner.h"


static void CheckStaleRelFilesFrom(Oid tablespaceoid, Oid dboid);
static void CheckStaleRelFilesFromTablespace(Oid tablespaceoid);

/* Like AllocateDir, but ereports on failure */
static DIR *
AllocateDirChecked(char *path)
{
	DIR		   *dirdesc = AllocateDir(path);

	if (dirdesc == NULL)
		ereport(ERROR,
				(errcode_for_file_access(),
				 errmsg("could not open directory \"%s\": %m",
						path)));
	return dirdesc;
}

/*
 * Scan through all tablespaces for relations left over
 * by aborted transactions.
 */
void
CheckStaleRelFiles(void)
{
	DIR		   *dirdesc;
	struct dirent *de;
	char	   *path;
	int			pathlen;

	pathlen = strlen(DataDir) + 11 + 1;
	path = (char *) palloc(pathlen);
	snprintf(path, pathlen, "%s/pg_tblspc/", DataDir);
	dirdesc = AllocateDirChecked(path);
	while ((de = readdir(dirdesc)) != NULL)
	{
		char	   *invalid;
		Oid			tablespaceoid;

		/* Check that the directory name looks like valid tablespace link.	*/
		tablespaceoid = (Oid) strtol(de->d_name, &invalid, 10);
		if (invalid[0] == '\0')
			CheckStaleRelFilesFromTablespace(tablespaceoid);
	}
	FreeDir(dirdesc);
	pfree(path);

	CheckStaleRelFilesFromTablespace(DEFAULTTABLESPACE_OID);
}

/* Scan a specific tablespace for stale relations */
static void
CheckStaleRelFilesFromTablespace(Oid tablespaceoid)
{
	DIR		   *dirdesc;
	struct dirent *de;
	char	   *path;

	path = GetTablespacePath(tablespaceoid);

	dirdesc = AllocateDirChecked(path);
	while ((de = readdir(dirdesc)) != NULL)
	{
		char	   *invalid;
		Oid			dboid;

		dboid = (Oid) strtol(de->d_name, &invalid, 10);
		if (invalid[0] == '\0')
			CheckStaleRelFilesFrom(tablespaceoid, dboid);
	}
	FreeDir(dirdesc);
	pfree(path);
}

/* Scan a specific database in a specific tablespace for stale relations.
 *
 * First, pg_class for the database is opened, and the relfilenodes of all
 * relations mentioned there are stored in a hash table.
 *
 * Then the directory is scanned. Every file in the directory that's not
 * found in pg_class (the hash table) is logged.
 */
static void
CheckStaleRelFilesFrom(Oid tablespaceoid, Oid dboid)
{
	DIR		   *dirdesc;
	struct dirent *de;
	HASHCTL		hashctl;
	HTAB	   *relfilenodeHash;
	RelFileNode rnode;
	char	   *path;

	/*
	 * The entry contents is not used for anything, we just check if an oid is
	 * in the hash table or not.
	 */
	hashctl.keysize = sizeof(Oid);
	hashctl.entrysize = sizeof(Oid);
	hashctl.hash = tag_hash;
	relfilenodeHash = hash_create("relfilenodeHash", 100, &hashctl,
								  HASH_FUNCTION | HASH_ELEM);

	/* Read all relfilenodes from pg_class into the hash table */
	{
		ResourceOwner owner,
					oldowner;
		Relation	rel;
		HeapScanDesc scan;
		HeapTuple	tuple;

		/* Need a resowner to keep the heapam and buffer code happy */
		owner = ResourceOwnerCreate(NULL, "CheckStaleRelFiles");
		oldowner = CurrentResourceOwner;
		CurrentResourceOwner = owner;

		rnode.spcNode = tablespaceoid;
		rnode.dbNode = dboid;
		rnode.relNode = RelationRelationId;
		rel = XLogOpenRelation(true, 0, rnode);

		scan = heap_beginscan(rel, SnapshotNow, 0, NULL);
		while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
		{
			Form_pg_class classform = (Form_pg_class) GETSTRUCT(tuple);

			hash_search(relfilenodeHash, &classform->relfilenode,
						HASH_ENTER, NULL);
		}
		heap_endscan(scan);

		XLogCloseRelation(rnode);
		CurrentResourceOwner = oldowner;
		ResourceOwnerDelete(owner);
	}

	/* Scan the directory */
	path = GetDatabasePath(dboid, tablespaceoid);

	dirdesc = AllocateDirChecked(path);
	while ((de = readdir(dirdesc)) != NULL)
	{
		char	   *invalid;
		Oid			relfilenode;

		relfilenode = strtol(de->d_name, &invalid, 10);
		if (invalid[0] == '\0')
		{
			/*
			 * Filename was a valid number, check if pg_class knows about it
			 */
			if (hash_search(relfilenodeHash, &relfilenode,
							HASH_FIND, NULL) == NULL)
			{
				char	   *filepath;

				rnode.spcNode = tablespaceoid;
				rnode.dbNode = dboid;
				rnode.relNode = relfilenode;

				filepath = relpath(rnode);
				ereport(LOG,
						(errcode_for_file_access(),
						 errmsg("table or index file \"%s\" is stale and can safely be removed",
								filepath)));
				pfree(filepath);
			}
		}
	}
	FreeDir(dirdesc);
	pfree(path);
	hash_destroy(relfilenodeHash);
}
Index: doc/src/sgml/maintenance.sgml
===================================================================
RCS file: /cvsroot/pgsql/doc/src/sgml/maintenance.sgml,v
retrieving revision 1.41
retrieving revision 1.42
diff -c -r1.41 -r1.42
*** doc/src/sgml/maintenance.sgml	20 Feb 2005 02:21:26 -0000	1.41
--- doc/src/sgml/maintenance.sgml	2 May 2005 18:26:52 -0000	1.42
***************
*** 474,479 ****
--- 474,496 ----
    </para>
   </sect1>
  
+  <sect1 id="check-files-after-crash">
+   <title>Check files after crash</title>
+ 
+   <indexterm zone="check-files-after-crash">
+    <primary>stale file</primary>
+   </indexterm>
+ 
+   <para>
+    <productname>PostgreSQL</productname> recovers automatically after crash
+    using the write-ahead log (see <xref linkend="wal">) and no manual 
+    operations are normally needed. However, if there was a transaction running 
+    when the crash occured that created or dropped a relation, the 
+    transaction might have left a stale file in the data directory. If this 
+    happens, you will get a notice in the log file stating which files can be 
+    deleted.
+   </para>
+  </sect1>
  
   <sect1 id="logfile-maintenance">
    <title>Log File Maintenance</title>
Index: src/backend/access/transam/xlog.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/access/transam/xlog.c,v
retrieving revision 1.189
retrieving revision 1.190
diff -c -r1.189 -r1.190
*** src/backend/access/transam/xlog.c	28 Apr 2005 21:47:10 -0000	1.189
--- src/backend/access/transam/xlog.c	2 May 2005 18:26:52 -0000	1.190
***************
*** 43,48 ****
--- 43,49 ----
  #include "utils/builtins.h"
  #include "utils/guc.h"
  #include "utils/relcache.h"
+ #include "utils/flatfiles.h"
  
  
  /*
***************
*** 4525,4530 ****
--- 4526,4533 ----
  
  		CreateCheckPoint(true, true);
  
+ 		CheckStaleRelFiles();
+ 
  		/*
  		 * Close down recovery environment
  		 */
***************
*** 4536,4541 ****
--- 4539,4550 ----
  		 */
  		remove_backup_label();
  	}
+ 	else
+ 	{
+ 		XLogInitRelationCache();
+ 		CheckStaleRelFiles();
+ 		XLogCloseRelationCache();
+ 	}
  
  	/*
  	 * Preallocate additional log files, if wanted.
Index: src/backend/catalog/catalog.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/catalog/catalog.c,v
retrieving revision 1.59
retrieving revision 1.60
diff -c -r1.59 -r1.60
*** src/backend/catalog/catalog.c	14 Apr 2005 20:03:23 -0000	1.59
--- src/backend/catalog/catalog.c	2 May 2005 18:26:53 -0000	1.60
***************
*** 106,111 ****
--- 106,144 ----
  	return path;
  }
  
+ /*
+  * GetTablespacePath	- construct path to a tablespace symbolic link
+  *
+  * Result is a palloc'd string.
+  *
+  * XXX this must agree with relpath and GetDatabasePath!
+  */
+ char *
+ GetTablespacePath(Oid spcNode)
+ {
+ 	int			pathlen;
+ 	char	   *path;
+ 
+ 	Assert(spcNode != GLOBALTABLESPACE_OID);
+ 
+ 	if (spcNode == DEFAULTTABLESPACE_OID)
+ 	{
+ 		/* The default tablespace is {datadir}/base */
+ 		pathlen = strlen(DataDir) + 5 + 1;
+ 		path = (char *) palloc(pathlen);
+ 		snprintf(path, pathlen, "%s/base",
+ 				 DataDir);
+ 	}
+ 	else
+ 	{
+ 		/* All other tablespaces have symlinks in pg_tblspc */
+ 		pathlen = strlen(DataDir) + 11 + OIDCHARS + 1;
+ 		path = (char *) palloc(pathlen);
+ 		snprintf(path, pathlen, "%s/pg_tblspc/%u",
+ 				 DataDir, spcNode);
+ 	}
+ 	return path;
+ }
  
  /*
   * IsSystemRelation
Index: src/backend/commands/tablespace.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/commands/tablespace.c,v
retrieving revision 1.17
retrieving revision 1.18
diff -c -r1.17 -r1.18
*** src/backend/commands/tablespace.c	14 Apr 2005 20:03:24 -0000	1.17
--- src/backend/commands/tablespace.c	2 May 2005 18:26:53 -0000	1.18
***************
*** 341,348 ****
  	/*
  	 * All seems well, create the symlink
  	 */
! 	linkloc = (char *) palloc(strlen(DataDir) + 11 + 10 + 1);
! 	sprintf(linkloc, "%s/pg_tblspc/%u", DataDir, tablespaceoid);
  
  	if (symlink(location, linkloc) < 0)
  		ereport(ERROR,
--- 341,347 ----
  	/*
  	 * All seems well, create the symlink
  	 */
! 	linkloc = GetTablespacePath(tablespaceoid);
  
  	if (symlink(location, linkloc) < 0)
  		ereport(ERROR,
***************
*** 495,502 ****
  	char	   *subfile;
  	struct stat st;
  
! 	location = (char *) palloc(strlen(DataDir) + 11 + 10 + 1);
! 	sprintf(location, "%s/pg_tblspc/%u", DataDir, tablespaceoid);
  
  	/*
  	 * Check if the tablespace still contains any files.  We try to rmdir
--- 494,500 ----
  	char	   *subfile;
  	struct stat st;
  
! 	location = GetTablespacePath(tablespaceoid);
  
  	/*
  	 * Check if the tablespace still contains any files.  We try to rmdir
***************
*** 1036,1043 ****
  		set_short_version(location);
  
  		/* Create the symlink if not already present */
! 		linkloc = (char *) palloc(strlen(DataDir) + 11 + 10 + 1);
! 		sprintf(linkloc, "%s/pg_tblspc/%u", DataDir, xlrec->ts_id);
  
  		if (symlink(location, linkloc) < 0)
  		{
--- 1034,1040 ----
  		set_short_version(location);
  
  		/* Create the symlink if not already present */
! 		linkloc = GetTablespacePath(xlrec->ts_id);
  
  		if (symlink(location, linkloc) < 0)
  		{
Index: src/backend/utils/adt/misc.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/utils/adt/misc.c,v
retrieving revision 1.40
retrieving revision 1.41
diff -c -r1.40 -r1.41
*** src/backend/utils/adt/misc.c	31 Dec 2004 22:01:22 -0000	1.40
--- src/backend/utils/adt/misc.c	2 May 2005 18:26:53 -0000	1.41
***************
*** 26,31 ****
--- 26,32 ----
  #include "funcapi.h"
  #include "catalog/pg_type.h"
  #include "catalog/pg_tablespace.h"
+ #include "catalog/catalog.h"
  
  #define atooid(x)  ((Oid) strtoul((x), NULL, 10))
  
***************
*** 144,154 ****
  
  		fctx = palloc(sizeof(ts_db_fctx));
  
- 		/*
- 		 * size = path length + tablespace dirname length + 2 dir sep
- 		 * chars + oid + terminator
- 		 */
- 		fctx->location = (char *) palloc(strlen(DataDir) + 11 + 10 + 1);
  		if (tablespaceOid == GLOBALTABLESPACE_OID)
  		{
  			fctx->dirdesc = NULL;
--- 145,150 ----
***************
*** 157,168 ****
  		}
  		else
  		{
! 			if (tablespaceOid == DEFAULTTABLESPACE_OID)
! 				sprintf(fctx->location, "%s/base", DataDir);
! 			else
! 				sprintf(fctx->location, "%s/pg_tblspc/%u", DataDir,
! 						tablespaceOid);
! 
  			fctx->dirdesc = AllocateDir(fctx->location);
  
  			if (!fctx->dirdesc)
--- 153,159 ----
  		}
  		else
  		{
! 			fctx->location = GetTablespacePath(tablespaceOid);
  			fctx->dirdesc = AllocateDir(fctx->location);
  
  			if (!fctx->dirdesc)
Index: src/backend/utils/init/Makefile
===================================================================
RCS file: /cvsroot/pgsql/src/backend/utils/init/Makefile,v
retrieving revision 1.18
retrieving revision 1.19
diff -c -r1.18 -r1.19
*** src/backend/utils/init/Makefile	20 Feb 2005 02:22:00 -0000	1.18
--- src/backend/utils/init/Makefile	2 May 2005 18:26:53 -0000	1.19
***************
*** 12,18 ****
  top_builddir = ../../../..
  include $(top_builddir)/src/Makefile.global
  
! OBJS = flatfiles.o globals.o miscinit.o postinit.o
  
  all: SUBSYS.o
  
--- 12,18 ----
  top_builddir = ../../../..
  include $(top_builddir)/src/Makefile.global
  
! OBJS = flatfiles.o globals.o miscinit.o postinit.o checkfiles.o
  
  all: SUBSYS.o
  
Index: src/include/catalog/catalog.h
===================================================================
RCS file: /cvsroot/pgsql/src/include/catalog/catalog.h,v
retrieving revision 1.30
retrieving revision 1.31
diff -c -r1.30 -r1.31
*** src/include/catalog/catalog.h	31 Dec 2004 22:03:24 -0000	1.30
--- src/include/catalog/catalog.h	2 May 2005 18:26:54 -0000	1.31
***************
*** 19,24 ****
--- 19,25 ----
  
  extern char *relpath(RelFileNode rnode);
  extern char *GetDatabasePath(Oid dbNode, Oid spcNode);
+ extern char *GetTablespacePath(Oid spcNode);
  
  extern bool IsSystemRelation(Relation relation);
  extern bool IsToastRelation(Relation relation);
Index: src/include/utils/flatfiles.h
===================================================================
RCS file: /cvsroot/pgsql/src/include/utils/flatfiles.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -c -r1.1 -r1.2
*** src/include/utils/flatfiles.h	20 Feb 2005 02:22:07 -0000	1.1
--- src/include/utils/flatfiles.h	2 May 2005 18:26:54 -0000	1.2
***************
*** 30,33 ****
--- 30,36 ----
  
  extern Datum flatfile_update_trigger(PG_FUNCTION_ARGS);
  
+ /* from checkfiles.c */
+ extern void CheckStaleRelFiles(void);
+ 
  #endif   /* FLATFILES_H */


Home | Main Index | Thread Index

Privacy Policy | About PostgreSQL
Copyright © 1996 – 2012 PostgreSQL Global Development Group