Counting number of inversion in an array (python)

def count_inversion(array):
    (count,sorted_array) = count_inversion_helper(array)
    return count

def count_inversion_helper(array1):
    """"
    counts the number for inversion in one array we assume the inversions
    are related to sorted array from
     0 to n (pep8 tested with 10.0 score)
    :author name: Tanmay Datta
    :param array1: array to count (list type)
    :return: number of inversoin
    :type array1: List(Num)
    """"
    length = len(array1)
    if length <= 1:
        #Trivial case and no inversion
        return (0, array1)
    if length == 2:
        if array1[0] &amp< array1[1]:
            return (0, array1)
        else:
            return (1, [array1[1], array1[0]]) # sorted array
    else:
        half = length/2
        right_array = array1[half:]
        (left_inversion, left_array) = count_inversion_helper(array1[:half])
        (right_inversion, right_array) = count_inversion_helper(array1[half:])
        sorted_array = []
        left_idx = 0
        right_idx = 0
        left_size = len(left_array)
        right_size = len(right_array)
        total_inversions = left_inversion + right_inversion
        while True:
            if right_array[right_idx] < left_array[left_idx]:
                sorted_array.append(right_array[right_idx])
                total_inversions += left_size - left_idx
                right_idx += 1

            else:
                sorted_array.append(left_array[left_idx])
                left_idx += 1
            if left_idx >= left_size:
                sorted_array += right_array[right_idx:]
                break
            if right_idx >= right_size:
                sorted_array += left_array[left_idx:]
                break
        return (total_inversions, sorted_array)

Another funky implementation which uses a “bisect” function already given by python libraries

import bisect

def ms_funky(arr):

sorted_array = sorted(arr)

r = 0

for n in arr:

    pos = bisect.bisect_left(sorted_array,n)

    r += pos

    sorted_array.pop(pos)

return r

Advertisements

Merge sort in python (implementation,testing,re implementation, style fix)

I am taking one online course on algorithms and thought would write a programming tutorial (or at least my thought process) along with implementation of merge-sort algorithm.

————————— merge-sort rules——————————-

If array size is 1 it’s always sorted

If array size is 2 then sorting is trivial and could be done as (if a[1] < a[2] then good otherwise reverse it)

In all other case divide array in 2 parts and perform sort individually.

Once both sided are sorted the merge them back iteratively by checking each element one by one in  the half array

—————————————-First attempt ————————————————-

def merge_sort(arr):

l = len(arr)

if l <= 1:

    return arr

elif l ==2:

if arr[0] < arr[1]:

    return arr

else:

    return [arr[1], arr[0]]

else:

    half = l // 2

    left = merge_sort(arr[:half])

    right = merge_sort(arr[half:])

    sorted_arr = []

    for idx in xrange(0,half):

    if left[0] <= right[0]:

        v = left.pop(0)

    else:

        v = right.pop(0)

    sorted_arr.append(v)

    sorted_arr = sorted_arr + left + right

    return sorted_arr

Looks ok (hmmm!!!!)

As a programmer the most important thing that you need to check a code for is ____ –>

“Correctness”

let’s write some tests

What are good tests ?

1. Checks for “common” values correctness

2. Checks for “extreme” cases correctness

3. Checks for exceptions and correctness

from back of my mind I can think of following cases

test1 = [1,2,3,4,5,6] (Checks what happens when array is already sorted)

test2 = [4,3,1,7,6,5] (checks for the real sorting problem under normal kind of case (even numbers)

test3 = [4,3,1,7,6,5,2]  (checks similar to test 2 along just odd numbers)

test4 = [] (Extreme case when array is empty)

test5 = [4,4,4,4,4] (Extreme case when all elements are same)

test6 =[2,2,1,1,5,3,6,100] (Not extreme but good case having some duplicates)

———————————————Input————————————————-


test1 = [1,2,3,4,5,6]

test2 = [4,3,1,7,6,5]

test3 = [4,3,1,7,6,5,2]

test4 = []

test5 = [4,4,4,4,4]

test6 =[2,2,1,1,5,3,6,100]

print merge_sort(test1)

print merge_sort(test2)

print merge_sort(test3)

print merge_sort(test4)

print merge_sort(test5)

print merge_sort(test6)

——————————————output at first attempt——————————————


[1, 2, 3, 4, 5, 6]
[1, 4, 3, 5, 7, 6]
[1, 2, 4, 3, 5, 6, 7]
[]
[4, 4, 4, 4, 4]
[1, 1, 2, 2, 3, 5, 6, 100]

Hmm so the code has some bug for test2 and test3 😦

what happend?

The code has a logic flaw (instead of a for there should be a while) posting the code without explaining the logical flaw as I think that is quite trivial

def merge_sort(arr):

l = len(arr)

if l <= 1:

return arr

elif l ==2:

if arr[0] < arr[1]:

return arr

else:

return [arr[1], arr[0]]

else:

half = l // 2

left = merge_sort(arr[:half])

right = merge_sort(arr[half:])

sorted_arr = []

while len(left) != 0 and len(right) != 0:

if left[0] <= right[0]:

v = left.pop(0)

else:

v = right.pop(0)

sorted_arr.append(v)

sorted_arr = sorted_arr + left + right

return sorted_arr

——— So all the test passed after this ——

Next step –>

Fix style

Style depends a lot on programmers taste but there are certain things that everyone (at least every python programmer) would appreciate. So I am going to just touch those points

def merge_sort_withstyle(arr):

"""

The program is a style corrected version written by Tanmay for

mergesort

"""

length = len(arr)

sorted_arr = arr if length

return arr if length <= 1

if length <= 1:

return arr

elif length == 2:

return arr if arr[0] < arr[1] else [arr[1], arr[0]]

else:

half = length//2

left = merge_sort_withstyle(arr[:half])

right = merge_sort_withstyle(arr[half:])

while len(left) != 0 and len(right) != 0:

value = left.pop(0) if left[0] < right[0] else right.pop(0)

sorted_arr.append(value)

sorted_arr += left + right

return sorted_arr

Next step (as a mathematician and a really good programmer)

Optimize 

(have fun !! )

Web-Crawler (Python) for getting financial data

*** I have moved this blog to www.tanmaydutta.com

1. Pick website: [I am going to scrap http://finance.yahoo.com/q?s=DAIMX This is a random choice for an Indian equity and I am going to study a couple of more equities.
2. Create a folder to start your project –>

 scrapy startproject yahooFinance 

This should create the following kind of structure
folderTree
We will leave this structure as it is for now.
Next we just need to define some items to scrap from yahoo finance in items.py (which is located in yahooFinance –> yahooFinance –> items.py)
Now, we will write some basic code in items.py (This defines name and allowed domain)

#spider to crawl yahoofinance DMS India

from scrapy.spider import BaseSpider

class DMSSpider(BaseSpider):
name = "DMSE"
allowed_domains = ["finance.yahoo.com"]
start_urls = ["http://finance.yahoo.com/q?s=DAIMX"]

def parse(self, response):
filename = response.url.split("/")[-2]
open(filename,'wb').write(response.body)

Next go to spider directory
–> create a new spider which would crawl the website

#spider to crawl yahoofinance DMS India
#written by Tanmay
# Does not do any fancy thing.

from scrapy.spider import BaseSpider
from scrapy.selector import HtmlXPathSelector
from yahooFinance.items import TickItem

class DMSSpider(BaseSpider):
name = "DMS"
allowed_domains = ["finance.yahoo.com"]
start_urls = ["http://finance.yahoo.com/q?s=DAIMX"]

def parse(self, response):
print 'in part'
hxs = HtmlXPathSelector(response)
sites = hxs.select('//span[contains(@id,"l10")]')
for site in sites:
item = TickItem()
item['name'] = [u'DMSTick']
item['value'] = site.select('text()').extract()
return item

Go to main project directory and start crawler by typing

scrapy crawl DMS

Two things (that I Can think of right now) can cause error at this point are :

#1 . Unknown command: crawl Use “scrapy” to see available commands More commands are available in project mode means you are not in the correct directory.

You have to be in a directory where your scrapy.cfg file is

#2. No crawler name “xyz” is found.

Make sure that the spider file that you just created has the “name” property exactly xyz or whatever you actually wanted

——————————————————-

Ok so if everything runs perfectly till now then that means basic functionality is completed.

—————————————————–

Next time will start beefing up the items.py to extract real data

# Define here the models for your scraped items
#
# See documentation in:
# http://doc.scrapy.org/topics/items.html

from scrapy.item import Item, Field

class TickItem(Item):
    # define the fields for your item here like:
    name = Field()
    value = Field()

Run it and save resuls in a JSON (DMSTick) by

scrapy crawl DMS -o DMSTick.json -t json

The final Json that this code produce is :

[[[{"name": ["demo"], "value": ["8.72"]}]
[{"name": ["demo"], "value": ["8.72"]}]

Solutions of course at http://courses.cms.caltech.edu/cs11/ in Erlang,C++,C,C#,Python

Python Implementation
-------------------------------------------------------------------
# -- Lab 1 for python
from math import sqrt
from string import upper
def welcome():
 print "Hello world of python \n"

def solve_quadEq (A,B,C):
 try:
 A+B+C
 except:
 print "Please enter all numerics\n"
 return -1
 try:
 SqrPart = ( B * B - 4 * A * C)
 except:
 print " This equation has complex roots "
 return -1
 Ans1 = (-B + SqrPart) / (2 * A)
 Ans2 = (-B - SqrPart) / (2 * A)
 return [Ans1,Ans2]

def fileLen (File):
 try:
 f = open(File,"r")
 except:
 print "Can not open file /n"
 return -1
 i = 0
 while (f.readline()) :
 i += 1
 print "Number of lines is \n"
 f.close()
 return i

def file_to_upper(File,Dest):
 try:
 F = open(File,"r")
 D = open(Dest,"w")
 except:
 print "Error in I/O file \n"
 return -1
 while (F.readline()):
 D.write(upper(F.readline()))
 F.close()
 D.close()
 return 0

Erlang Implementation 

-------------------------------------------------------------------

% -- Lab 1 for python
-module(erlangPythonLAB11).
-compile(export_all).

welcome() ->
 io:format("Hello World of Erlang~n").
solve_quadEq( A, B, C) ->
 try math:sqrt( B * B - 4 * A * C) of
 SqrPart ->
 Ans1 = (-B + SqrPart) / (2 * A),
 Ans2 = (-B - SqrPart) / (2 * A),
 {Ans1,Ans2}
 catch
 error:What -> io:format("Can not happend because of..~s~n",[What])
 end.

fileLen(File) ->
 try file:open(File,read) of
 _ -> {ok,S}= file:open(File,read),
 {ok,Dest}=file:open("Temp.xml",write),
 file_to_upper(S,Dest),
 {ok,S1}= file:open(File,read),
 getLine(S1,0)

catch
 error:What -> io:format("Can not open file ~s~n",[What])
 end.
getLine(S,I) ->
 case
 io:get_line(S,'') of
 eof -> file:close(S),
 {eof,I};
 _A->
 getLine(S,I+1)
 end.
file_to_upper(S,Dest) ->
 case io:get_line(S,'') of
 eof -> file:close(S),
 file:close(Dest),
 ok;
 A ->
 io:format(Dest,"~p,~n",[string:to_upper(A)]),
 file_to_upper(S,Dest)
 end.

Tower of Brahma (MisQuoted as Hanoi) in Erlang

----------------------------------------------------------------------------------

</pre>
-module(erlangPythonLAB24).
-compile(export_all).
move ([],[]) ->
{[],[]};
move (A,[]) ->
LastA = lists:last(A),
A1 = lists:delete(LastA,A),
{A1,[LastA]};
move ([],B) ->
{B1,A}=move(B,[]),
{A,B1};
move(A,B) ->
LastA = lists:last(A),
LastB = lists:last(B),
if LastA
B1=lists:delete(LastB,B),
A1 =lists:append(A,[LastB]),
{A1,B1};
true ->
A1=lists:delete(LastA,A),
B1 =lists:append(B,[LastA]),
{A1,B1}
end.
brahmaTower(1) ->
{[],[],[1]};
brahmaTower(P) ->
A = lists:seq(1,P),
B =[],
C = [],
brahmaTowerH(A,B,C,P,0).
brahmaTowerH([],[],_C,_Nvar,Iter) ->
io:format("Done and number of iterations taken are ~p~n",[Iter]);
brahmaTowerH(A,B,C,Nvar,Iter) ->
{A1,B1}=move(A,B),
{A2,C1} = move (A1,C),
{B2,C2} = move(B1,C1),
io:format("A is: ~p~nB is: ~p~nC is: ~p~n~n",[A2,B2,C2]),
brahmaTowerH(A2,B2,C2,Nvar,Iter+1).

Python Implementation

</pre>
def move(A,B):
 SizeA = len(A)
 SizeB = len(B)
 if SizeA !=0 and SizeB != 0:
 try:
 LastA = A[-1]
 LastB = B[-1]
 except:
 print "Can not access last elements \n"
 return -1
 if LastA<LastB:
 A.append(B.pop())
 return [A,B]
 else:
 B.append(A.pop())
 return [A,B]
 else:
 if SizeA==0:
 if SizeB==0:
 return [[],[]]
 else:
 A.append(B.pop())
 return [A,B]
 else:
 B.append(A.pop())
 return [A,B]

def brahmaTower(N):
 A = range(1,N+1)
 B =[]
 C =[]
 brahmaTowerH(A,B,C,0)

def brahmaTowerH(A,B,C,Iter):
 if len(A) == 0 and len(B) == 0:
 print "Done in ", Iter , "steps \n"
 else:
 [A1,B1] = move(A,B)
 [A2,C1] = move(A1,C)
 [B2,C2] = move(B,C)
 print "A is:",A2, "\n", "B is:",B2,"\n","C is:",C2,"\n"
 brahmaTowerH(A2,B2,C2,Iter+1)