--- ie_imp_UTF8.cpp.orig	Wed Feb  7 08:55:08 2001
+++ ie_imp_UTF8.cpp	Tue Apr 10 23:08:02 2001
@@ -308,8 +308,62 @@
 
 bool IE_Imp_UTF8::RecognizeContents(const char * szBuf, UT_uint32 iNumbytes)
 {
-	// TODO: Not yet written
-	return(false);
+	bool bSuccess = false;
+	const unsigned char *p = reinterpret_cast<const unsigned char *>(szBuf);
+
+	while (p < reinterpret_cast<const unsigned char *>(szBuf + iNumbytes))
+	{
+		int len;
+		
+		if ((*p & 0x80) == 0)					// ASCII
+		{
+			++p;
+			continue;
+		}
+		else if ((*p & 0xc0) == 0x80)			// not UTF-8
+		{
+			return false;
+		}
+		else if (*p == 0xfe || *p == 0xff)		// BOM markers?  RFC2279 says illegal
+		{
+			UT_DEBUGMSG(("  BOM?\n"));
+			++p;
+			continue;
+		}
+		else if ((*p & 0xfe) == 0xfc)			// lead byte in 6-byte sequence
+			len = 6;
+		else if ((*p & 0xfc) == 0xf8)			// lead byte in 5-byte sequence
+			len = 5;
+		else if ((*p & 0xf8) == 0xf0)			// lead byte in 4-byte sequence
+			len = 4;
+		else if ((*p & 0xf0) == 0xe0)			// lead byte in 3-byte sequence
+			len = 3;
+		else if ((*p & 0xe0) == 0xc0)			// lead byte in 2-byte sequence
+			len = 2;
+		else
+		{
+			// the above code covers all cases - if we reach here the logic is wrong
+			UT_ASSERT(UT_SHOULD_NOT_HAPPEN);
+			return false;
+		}
+	
+		while (--len)
+		{
+			++p;
+			if (p >= reinterpret_cast<const unsigned char *>(szBuf + iNumbytes))
+			{
+				UT_DEBUGMSG(("  out of data!\n"));
+				break;
+			}
+			if ((*p & 0xc0) == 0x80)
+				bSuccess = true;
+			else
+				return false;
+		}
+		++p;
+	}
+	
+	return bSuccess;
 }
 
 bool IE_Imp_UTF8::RecognizeSuffix(const char * szSuffix)