Skip to content
This repository has been archived by the owner on Jul 13, 2024. It is now read-only.

Phase08 ef core #8

Open
wants to merge 18 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
464 changes: 464 additions & 0 deletions .gitignore

Large diffs are not rendered by default.

3 changes: 3 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"java.configuration.updateBuildConfiguration": "interactive"
}
27 changes: 27 additions & 0 deletions phase01_search/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

<groupId>com.example</groupId>
<artifactId>phase01_search</artifactId>
<version>1.0-SNAPSHOT</version>

<properties>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
</properties>

<dependencies>
<!-- Other dependencies -->
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<version>1.18.30</version>
<scope>provided</scope>
</dependency>
</dependencies>


</project>
17 changes: 17 additions & 0 deletions phase01_search/src/main/java/com/example/EntryStructure.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
package com.example;

import java.util.HashSet;
import java.util.Set;
import lombok.Getter;

public class EntryStructure {
@Getter
private String word;
@Getter
private Set<String> fileNames = new HashSet<String>();

public EntryStructure(String word, Set<String> fileNames) {
this.word = word;
this.fileNames = fileNames;
}
}
28 changes: 28 additions & 0 deletions phase01_search/src/main/java/com/example/FileReader.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
package com.example;

import java.nio.file.*;
import java.util.ArrayList;

public class FileReader {
public static ArrayList<String> getFileNames(String path){
ArrayList<String> fileNames = new ArrayList<String>();
Path folder = Paths.get(path);

if (Files.isDirectory(folder)) {
try {
DirectoryStream<Path> directoryStream = Files.newDirectoryStream(folder);
for (Path filePath : directoryStream) {

fileNames.add(String.valueOf(filePath.getFileName()));
}
directoryStream.close();
} catch (Exception e) {
e.printStackTrace();
}
} else {
System.out.println("Specified path is not a directory.");
}

return fileNames;
}
}
122 changes: 122 additions & 0 deletions phase01_search/src/main/java/com/example/Main.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
package com.example;

import java.io.File;
import java.io.FileNotFoundException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Scanner;
import java.util.Set;
import java.util.Map;

public class Main {

// Searchs a word in inverted index then returns name of files that are containing that word
public static Set<String> searchWord(String word, Map<String, Set<String>> invertedIndex) {
return invertedIndex.getOrDefault(word, Collections.emptySet());
}

public static void main(String[] args) {

// Getting directory path and user input
Scanner myScanner = new Scanner(System.in);

System.out.print("Enter Data Folder Path: ");
final String directoryPath = myScanner.nextLine();

System.out.print("Enter Search Query: ");
final String userInput = myScanner.nextLine();

// Finding name of all files that are in EnglishData directory
ArrayList<String> fileNamesInFolder = FileReader.getFileNames(directoryPath);

// Reading files that are in data direcotry and constructing inverted index table
Map<String, Set<String>> invertedIndex = new HashMap<>();

for (String fileName : fileNamesInFolder) {
try {
File file = new File(directoryPath + "/" + fileName);
Scanner myReader = new Scanner(file);

while (myReader.hasNextLine()) {
Set<String> documentIds = new HashSet<String>();
documentIds.add(fileName);

String data = myReader.nextLine();
String[] splittedData = data.split(" ");

for (String word : splittedData) {
// Ignoring empty words
if (word == "") {
continue;
}
// Inserting new entry to inverted index table
invertedIndex.computeIfAbsent(word, k -> new HashSet<>()).addAll(documentIds);
}
}
myReader.close();
} catch (FileNotFoundException e) {
System.out.println("An error occurred.");
e.printStackTrace();
}
}

// Splitting user input to undrestand the command
String[] userInputWords = userInput.split(" ");

ArrayList<String> necessaryWords = new ArrayList<String>();
ArrayList<String> orWords = new ArrayList<String>();
ArrayList<String> notWords = new ArrayList<String>();

// Categorizing command words into orWords, necessary and forbidden groups
for (String word : userInputWords) {
if (word.startsWith("+")) {
word = word.replace('+', ' ').trim();
orWords.add(word);
} else if (word.startsWith("-")) {
word = word.replace('-', ' ').trim();
notWords.add(word);
} else {
necessaryWords.add(word);
}
}
myScanner.close();

// Initializing some sets for forming final answer
Set<String> necessaryFiles = new HashSet<String>();
Set<String> forbiddenFiles = new HashSet<String>();
Set<String> orFiles = new HashSet<String>();

// Finding intersection between necessary words' file names
for (String nWord : necessaryWords) {
if(necessaryFiles.isEmpty()){
necessaryFiles.addAll(searchWord(nWord, invertedIndex));
}
else{
necessaryFiles.retainAll(searchWord(nWord, invertedIndex));
}
}

// Finding all orWords' file names
for (String orWord : orWords) {
orFiles.addAll(searchWord(orWord, invertedIndex));
}

// Finding all forbidden words' file names
for (String notWord : notWords) {
forbiddenFiles.addAll(searchWord(notWord, invertedIndex));
}

// Final answer is intersection between necessary files and orFiles - forbidden files
Set<String> intersection = new HashSet<>(necessaryFiles);
intersection.retainAll(orFiles);
intersection.removeAll(forbiddenFiles);

//Printing the result
System.out.println("Results: ");
for (String entry : intersection) {
System.out.println(entry);
}
}
}
27 changes: 27 additions & 0 deletions phase02_clean_code/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

<groupId>com.example</groupId>
<artifactId>phase02_clean_code</artifactId>
<version>1.0-SNAPSHOT</version>

<properties>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
</properties>

<dependencies>
<!-- Other dependencies -->
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<version>1.18.30</version>
<scope>provided</scope>
</dependency>
</dependencies>


</project>
17 changes: 17 additions & 0 deletions phase02_clean_code/src/main/java/com/example/EntryStructure.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
package com.example;

import java.util.HashSet;
import java.util.Set;
import lombok.Getter;

public class EntryStructure {
@Getter
private String word;
@Getter
private Set<String> fileNames = new HashSet<String>();

public EntryStructure(String word, Set<String> fileNames) {
this.word = word;
this.fileNames = fileNames;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
package com.example;

import java.util.ArrayList;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;

public class FileNameCategorizer {
public static Set<String> categorizer(Map<String, Set<String>> invertedIndex, ArrayList<String> words, boolean isNecessary){
Set<String> finalResult = new HashSet<String>();

if(isNecessary){
for (String word : words) {
Set<String> searchResult = WordFileNameFinder.searchWord(word, invertedIndex);
if(finalResult.isEmpty()){
finalResult.addAll(searchResult);
}
else{
finalResult.retainAll(searchResult);
}
}
}else{
for (String word : words) {
Set<String> searchResult = WordFileNameFinder.searchWord(word, invertedIndex);
finalResult.addAll(searchResult);
}
}

return finalResult;
}
}
28 changes: 28 additions & 0 deletions phase02_clean_code/src/main/java/com/example/FileReader.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
package com.example;

import java.nio.file.*;
import java.util.ArrayList;

public class FileReader {
public static ArrayList<String> getFileNames(String path){
ArrayList<String> fileNames = new ArrayList<String>();
Path folder = Paths.get(path);

if (Files.isDirectory(folder)) {
try {
DirectoryStream<Path> directoryStream = Files.newDirectoryStream(folder);
for (Path filePath : directoryStream) {

fileNames.add(String.valueOf(filePath.getFileName()));
}
directoryStream.close();
} catch (Exception e) {
e.printStackTrace();
}
} else {
System.out.println("Specified path is not a directory.");
}

return fileNames;
}
}
82 changes: 82 additions & 0 deletions phase02_clean_code/src/main/java/com/example/Main.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
package com.example;

import java.io.File;
import java.io.FileNotFoundException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Scanner;
import java.util.Set;
import java.util.Map;

public class Main {

public static void main(String[] args) {

// Getting directory path and user input
Scanner myScanner = new Scanner(System.in);

System.out.print("Enter Data Folder Path: ");
final String directoryPath = myScanner.nextLine();

System.out.print("Enter Search Query: ");
final String userInput = myScanner.nextLine();

myScanner.close();

// Finding name of all files that are in EnglishData directory
ArrayList<String> fileNamesInFolder = FileReader.getFileNames(directoryPath);

// Reading files that are in data direcotry and constructing inverted index table
Map<String, Set<String>> invertedIndex = new HashMap<>();

for (String fileName : fileNamesInFolder) {
try {
File file = new File(directoryPath + "/" + fileName);
Scanner myReader = new Scanner(file);

while (myReader.hasNextLine()) {
Set<String> documentIds = new HashSet<String>();
documentIds.add(fileName);

String data = myReader.nextLine();
String[] splittedData = data.split(" ");

for (String word : splittedData) {
// Ignoring empty words
if (word == "") {
continue;
}
// Inserting new entry to inverted index table
invertedIndex.computeIfAbsent(word, k -> new HashSet<>()).addAll(documentIds);
}
}
myReader.close();
} catch (FileNotFoundException e) {
System.out.println("An error occurred.");
e.printStackTrace();
}
}

// Categorizing command words into or_words, necessary and forbidden groups
ArrayList<String> necessaryWords = WordCategorizer.categorizer(userInput, "");
ArrayList<String> orWords = WordCategorizer.categorizer(userInput, "+");
ArrayList<String> notWords = WordCategorizer.categorizer(userInput, "-");

// Initializing some sets for forming final answer
Set<String> necessaryFiles = FileNameCategorizer.categorizer(invertedIndex, necessaryWords, true);
Set<String> orFiles = FileNameCategorizer.categorizer(invertedIndex, orWords, false);
Set<String> forbiddenFiles = FileNameCategorizer.categorizer(invertedIndex, notWords, false);

// Final answer is intersection between necessary files and orFiles - forbidden files
Set<String> intersection = new HashSet<>(necessaryFiles);
intersection.retainAll(orFiles);
intersection.removeAll(forbiddenFiles);

//Printing the result
System.out.println("Results: ");
for (String entry : intersection) {
System.out.println(entry);
}
}
}
Loading