
#import "CSV2JSON.h"
#include <limits.h>
/*
bool is_utf8(const char * string) {
    if(!string)
        return 0;
	
    const unsigned char * bytes = (const unsigned char *)string;
    while(*bytes)
    {
        if(     (// ASCII
				 bytes[0] == 0x09 ||
				 bytes[0] == 0x0A ||
				 bytes[0] == 0x0D ||
				 (0x20 <= bytes[0] && bytes[0] <= 0x7E)
				 )
		   ) {
			bytes += 1;
			continue;
        }
		
        if(     (// non-overlong 2-byte
				 (0xC2 <= bytes[0] && bytes[0] <= 0xDF) &&
				 (0x80 <= bytes[1] && bytes[1] <= 0xBF)
				 )
		   ) {
			bytes += 2;
			continue;
        }
		
        if(     (// excluding overlongs
				 bytes[0] == 0xE0 &&
				 (0xA0 <= bytes[1] && bytes[1] <= 0xBF) &&
				 (0x80 <= bytes[2] && bytes[2] <= 0xBF)
				 ) ||
		   (// straight 3-byte
			((0xE1 <= bytes[0] && bytes[0] <= 0xEC) ||
			 bytes[0] == 0xEE ||
			 bytes[0] == 0xEF) &&
			(0x80 <= bytes[1] && bytes[1] <= 0xBF) &&
			(0x80 <= bytes[2] && bytes[2] <= 0xBF)
			) ||
		   (// excluding surrogates
			bytes[0] == 0xED &&
			(0x80 <= bytes[1] && bytes[1] <= 0x9F) &&
			(0x80 <= bytes[2] && bytes[2] <= 0xBF)
			)
		   ) {
			bytes += 3;
			continue;
        }
		
        if(     (// planes 1-3
				 bytes[0] == 0xF0 &&
				 (0x90 <= bytes[1] && bytes[1] <= 0xBF) &&
				 (0x80 <= bytes[2] && bytes[2] <= 0xBF) &&
				 (0x80 <= bytes[3] && bytes[3] <= 0xBF)
				 ) ||
		   (// planes 4-15
			(0xF1 <= bytes[0] && bytes[0] <= 0xF3) &&
			(0x80 <= bytes[1] && bytes[1] <= 0xBF) &&
			(0x80 <= bytes[2] && bytes[2] <= 0xBF) &&
			(0x80 <= bytes[3] && bytes[3] <= 0xBF)
			) ||
		   (// plane 16
			bytes[0] == 0xF4 &&
			(0x80 <= bytes[1] && bytes[1] <= 0x8F) &&
			(0x80 <= bytes[2] && bytes[2] <= 0xBF) &&
			(0x80 <= bytes[3] && bytes[3] <= 0xBF)
			)
		   ) {
			bytes += 4;
			continue;
        }
		
        return 0;
    }
	
    return 1;
}

NSString * convertISO8859ToUTF8(NSString * input) {
	
	NSMutableString * output;

	for (int i = 0, c = input.length; i < c; i++) {
		unichar ch = [input characterAtIndex:i]; // assume that code points above 0xff are impossible since latin-1 is 8-bit
		
		if(ch < 0x80) {
			[output appendFormat:@"%c", ch];
		} else {
			[output appendFormat:@"%c", (0xc0 | (ch & 0xc0) >> 6)]; // first byte, simplified since our range is only 8-bits
			[output appendFormat:@"%c", (0x80 | (ch & 0x3f))];
		}
	}
	
	
	if (!output.length) {
		NSLog(@"Conversion failed.\n");
		return @"{}";
	}
	
	return output;
	
}*/

static bool escapedQuotesAreFollowing(const char * ptr, bool inQuotes) {
	if (*(ptr-1) == '\\' && *(ptr) == '"') return true;
	if (inQuotes && *(ptr-1) == '"' && *(ptr) == '"' && inQuotes) return true;
	if (*(ptr-1) == '"' && *(ptr) == '"' && *(ptr+1) == '"') return true;
	return false;
}

static int lengthOfEscapedQuotes(const char * ptr, bool inQuotes) {
	if (*(ptr-1) == '\\' && *(ptr) == '"') return 2;
	if (inQuotes && *(ptr-1) == '"' && *(ptr) == '"') return 2;
	if (*(ptr-1) == '"' && *(ptr) == '"' && *(ptr+1) == '"') return 3;
	return 1;
}

NSString * CSV2JSON(NSString * csvString) {

	// encode to UTF-8, if neccessary
	//if (!is_utf8(csvString.UTF8String)) {
	//	csvString = convertISO8859ToUTF8(csvString);
	//}
	
	// parse CSV
	CSVParser * parser = [CSVParser.alloc init];
	parser.csvString = csvString;
	if ([parser parse]) {
		return [NSString.alloc initWithData:parser.jsonData encoding:NSUTF8StringEncoding];
	}
	
	return @"{}";

}


@implementation CSVParser {
	NSArray * headerArray;
	NSString * newlineSequence;
	unichar delimiter;
}

- (BOOL)isWhitespace:(unichar)ch {
	return
	ch == ' ' ||
	ch == '\t' ||
	ch == '\n' ||
	ch == '\r';
}

- (NSString *)trim:(NSString *)inputString {

	return [inputString stringByTrimmingCharactersInSet:
			[NSCharacterSet whitespaceCharacterSet]];
	
}

- (NSArray *)columnNames {
	return headerArray;
}

- (unichar)determineDelimiter {
	
	NSUInteger pos_nl1 = [self.csvString rangeOfString:newlineSequence].location;
	if (pos_nl1 == NSNotFound)
		return 0;

	NSRange searchRange = NSMakeRange(pos_nl1+newlineSequence.length, self.csvString.length-pos_nl1-newlineSequence.length);
	NSUInteger pos_nl2 = [self.csvString rangeOfString:newlineSequence options:0 range:searchRange].location;
	
	if (pos_nl2 == NSNotFound)
		return 0;

	
	NSString * line1 = [self.csvString substringToIndex:pos_nl1];
	NSString * line2 = [self.csvString substringWithRange:NSMakeRange(pos_nl1+newlineSequence.length, pos_nl2-pos_nl1-newlineSequence.length)];

	
	int line1Commas = 0, line2Commas = 0,
	line1Semicolons = 0, line2Semicolons = 0,
	line1Tabs = 0, line2Tabs = 0;
	
	
	for (NSUInteger i = 0, c = line1.length; i < c; i++) {
		unichar ch = [line1 characterAtIndex:i];
		switch (ch) {
			case ',':
				line1Commas++;
				break;
			case ';':
				line1Semicolons++;
				break;
			case '	':
				line1Tabs++;
				break;
			default:
				break;
		}
	}
	
	for (NSUInteger i = 0, c = line2.length; i < c; i++) {
		unichar ch = [line2 characterAtIndex:i];
		switch (ch) {
			case ',':
				line2Commas++;
				break;
			case ';':
				line2Semicolons++;
				break;
			case '	':
				line2Tabs++;
				break;
			default:
				break;
		}
	}
	
	if (line1Tabs && line1Tabs == line2Tabs)
		return '	';
	
	if (line1Semicolons && line1Semicolons == line2Semicolons)
		return ';';
	
	if (line1Commas && line1Commas == line2Commas)
		return ',';
	
	if (line1Commas > line1Tabs) return ',';
	if (line1Commas > line1Semicolons) return ',';
	if (line1Tabs) return '	';
	if (line1Semicolons) return '	';
	
	//cout << "line1Commas: " << line1Commas << ", line2Commas: " << line2Commas << endl;
	
	return 0;
}

- (void)determineNewlineSequence {
	
	if ([self.csvString rangeOfString:@"\r\n"].location != NSNotFound) {
		newlineSequence = @"\r\n";
	}
	else if ([self.csvString rangeOfString:@"\r"].location != NSNotFound) {
		newlineSequence = @"\r";
	}
	else {
		newlineSequence = @"\n";
	}
	
}


- (BOOL)newlineSequenceIsFollowing:(const char *) ptr {
	if ([newlineSequence isEqualToString:@"\r\n"] && *(ptr-1) == '\r' && *(ptr) == '\n') return true;
	if ([newlineSequence isEqualToString:@"\n"] && *(ptr-1) == '\n') return true;
	if ([newlineSequence isEqualToString:@"\r"] && *(ptr-1) == '\r') return true;
	return false;
}

- (NSString *)nameForColumnAtIndex:(NSUInteger)index {
	
	if (index >= headerArray.count)
		return [NSNumber numberWithInteger:index].stringValue;
	return headerArray[index];
}

- (NSArray *)parseHeader:(NSString *)line {
	
	const char * ptr = line.UTF8String;
	unichar ch;
	BOOL inQuotes = NO;
	//NSMutableString * str = [NSMutableString stringWithCString:"" encoding:NSUTF8StringEncoding];
	NSMutableData * data = [NSMutableData data];

	NSMutableArray * result = [NSMutableArray array];

	while ((ch = *(ptr++))) {
		//NSLog(@"%d %c", ch, ch);
		
		if (inQuotes) {
			if (escapedQuotesAreFollowing(ptr, true)) {
				[data appendBytes:"\"" length:1];
				//[str appendFormat:@"%c", '"'];
				ptr += lengthOfEscapedQuotes(ptr, true)-1;
			}
			else if (ch == '"') {
				inQuotes = false;
			} else {
				[data appendBytes:&ch length:1];
				//[str appendFormat:@"%c", ch];
			}
		}
		else {
			
			if (escapedQuotesAreFollowing(ptr, false)) {
				[data appendBytes:"\"" length:1];
				//[str appendFormat:@"%c", '"'];
				ptr += lengthOfEscapedQuotes(ptr, false)-1;
			}
			else if (ch == '"') {
				inQuotes = YES;
			}
			else {
				if (ch == delimiter && *(ptr-2) != '\\') {
					[result addObject:[NSString.alloc initWithData:data encoding:NSUTF8StringEncoding]];
					//str =  [NSMutableString stringWithCString:"" encoding:NSUTF32StringEncoding];
					data = [NSMutableData data];
					
				}
				else if (ch == '\\') {
					if (*ptr == '\\') {
						[data appendBytes:&ch length:1];
						//[str appendFormat:@"%c", ch];
						ptr++;
					}
				}
				else {
					//[str appendFormat:@"%c", ch];
					[data appendBytes:&ch length:1];
				}
			}
			
		}
		
	}
	
	if (data.length) {
		[result addObject:[NSString.alloc initWithData:data encoding:NSUTF8StringEncoding]];
	}//[result addObject:str];
	
	//NSLog(@"Header array: %@", result);

	return result;
}

- (BOOL)parse {

	NSMutableArray * result = [NSMutableArray array];
	
	if(!self.csvString.length) {
		NSLog(@"Error: The CSV is empty.");
		return NO;
	}
	
	[self determineNewlineSequence];
		
	if (!newlineSequence.length) {
#ifdef DEBUG
			NSLog(@"Warning: The CSV has no rows.");
#endif
		return NO;
	}
	
	delimiter = [self determineDelimiter];
	if (!delimiter) {
#ifdef DEBUG
		NSLog(@"Warning: The delimiter couldn't be determined.");
#endif
		return NO;
	}
	
	//NSLog(@"Delemiter: %c", delimiter);
	
	NSString * header = [self.csvString substringToIndex:[self.csvString rangeOfString:newlineSequence].location];
	
	headerArray = [self parseHeader:header];

	NSUInteger numberOfColumns = headerArray.count;
	int currentColumn = 0;
	bool inQuotes = false;
	bool inComment = false;
	
	//string str;
	//ROW row;
	NSMutableDictionary * row = [NSMutableDictionary dictionary];
	unichar ch;
	const char * csvCString = self.csvString.UTF8String;
	const char * ptr = csvCString + strlen(header.UTF8String) + newlineSequence.length;
	
	//NSLog(@"ptr: %s", ptr);
	NSMutableData * data = [NSMutableData data];
	
	while ((ch = *(ptr++))) {
		
		if (inComment) {
			if (ch == '\n' || ch == '\r') {
				inComment = false;
				//ptr += newlineSequence.length()-1;
			}
		}
		else if (inQuotes) {
			if (escapedQuotesAreFollowing(ptr, true)) {
				//[str appendFormat:@"%c", '"'];
				[data appendBytes:"\"" length:1];
				ptr += lengthOfEscapedQuotes(ptr, true)-1;
			}
			else if (ch == '"') {
				inQuotes = false;
			} else {
				//str += ch;
				//[str appendFormat:@"%c", ch];
				[data appendBytes:&ch length:1];
			}
		}
		else {
			
			if (escapedQuotesAreFollowing(ptr, false)) {
				//[str appendFormat:@"%c", '"'];
				[data appendBytes:"\"" length:1];
				ptr += lengthOfEscapedQuotes(ptr, false)-1;
			}
			else if (ch == '"') {
				inQuotes = true;
			}
			else {
				if ([self newlineSequenceIsFollowing:ptr]) {
					if (currentColumn < numberOfColumns)
						row[[self nameForColumnAtIndex:currentColumn]] = [NSString.alloc initWithData:data encoding:NSUTF8StringEncoding];
					//str = [NSMutableString string];
					data = [NSMutableData data];
					
					ptr += newlineSequence.length-1;
					//cout << "------>" << newlineSequence.length()-1 << endl;
					
					// skip empty lines
					if ((row.count == 1 && [row[@"0"] isEqualToString:@""]) == NO) {
						[result addObject:row]; // stores a copy
					}
					
					row = [NSMutableDictionary dictionary];
					currentColumn = 0;
					if (*(ptr) == '#') inComment = true;
				}
				else if (ch == delimiter && *(ptr-2) != '\\') {
					if (currentColumn < numberOfColumns)
						row[[self nameForColumnAtIndex:currentColumn]] = [NSString.alloc initWithData:data encoding:NSUTF8StringEncoding];
					data = [NSMutableData data];
					//str = [NSMutableString string];
					currentColumn++;
				}
				else if (ch == '\\') {
					if (*ptr == '\\') {
						[data appendBytes:&ch length:1];
						//[str appendFormat:@"%c", ch];
						ptr++;
					}
				}
				else {
					[data appendBytes:&ch length:1];
					//[str appendFormat:@"%c", ch];
				}
			}
			
		}
		
	}
	
	
	NSString * str = [self trim:[NSString.alloc initWithData:data encoding:NSUTF8StringEncoding]].mutableCopy;
	
	if (str.length) {
		if (currentColumn < numberOfColumns)
			row[[self nameForColumnAtIndex:currentColumn]] = str;
		[result addObject:row];
		row = [NSMutableDictionary dictionary];
	}
	
	if (row.count) {
		[result addObject:row];
	}

#if DEBUG
	// dump results
	//NSLog(@"Result of convertion: %@", result);
#endif
			
	self.jsonData = [NSJSONSerialization dataWithJSONObject:result options:0 error:nil];
	
	return YES;
}


@end
